In [None]:
import os
import zipfile

In [None]:
zip_file_path_dataset = '/workspace/datasets/isic-2024-challenge.zip'
extract_to_dir_dataset = '/workspace/datasets/isic-2024-challenge'
os.makedirs(extract_to_dir_dataset, exist_ok=True)
with zipfile.ZipFile(zip_file_path_dataset, 'r') as zip_ref:
    zip_ref.extractall(extract_to_dir_dataset)
print(f'Files extracted to {extract_to_dir_dataset}')

zip_file_path_dataset = '/workspace/datasets/isic-2019-dataset-resized-256.zip'
extract_to_dir_dataset = '/workspace/datasets/isic-2019-dataset-resized-256'
os.makedirs(extract_to_dir_dataset, exist_ok=True)
with zipfile.ZipFile(zip_file_path_dataset, 'r') as zip_ref:
    zip_ref.extractall(extract_to_dir_dataset)
print(f'Files extracted to {extract_to_dir_dataset}')

zip_file_path_dataset = '/workspace/datasets/isic-2020-dataset-resized-256.zip'
extract_to_dir_dataset = '/workspace/datasets/isic-2020-dataset-resized-256'
os.makedirs(extract_to_dir_dataset, exist_ok=True)
with zipfile.ZipFile(zip_file_path_dataset, 'r') as zip_ref:
    zip_ref.extractall(extract_to_dir_dataset)
print(f'Files extracted to {extract_to_dir_dataset}')

In [None]:
zip_file_path_dataset = '/workspace/utils.zip'
extract_to_dir_dataset = '/workspace/utils'
os.makedirs(extract_to_dir_dataset, exist_ok=True)
with zipfile.ZipFile(zip_file_path_dataset, 'r') as zip_ref:
    zip_ref.extractall(extract_to_dir_dataset)
print(f'Files extracted to {extract_to_dir_dataset}')

In [None]:
!pip install pandas matplotlib wandb timm scikit-learn -q

# TRAIN CONFIGURATION

In [None]:
import os
import random
import torch
import numpy as np
from collections import Counter
from torchvision.datasets import ImageFolder
from utils.data_utils_custom import create_train_valid_folders_custom, get_datasets_based_on_split_option
from utils.data_utils_standard import split_dataset_standard, process_standard_datasets
from utils.classification_model_utils import train_model, create_model
from utils.classification_evaluation import evaluate_model, visualize_dataloader_batch  # Updated import

# Configuration dictionary
CONFIG = {
    'model_name': 'efficientnet_b3.ra2_in1k',
    'project_name': 'ISIC_2024_Competition',
    'artifact_name': 'isic2024-simplehead-model',
    'learning_rate': 0.0003,
    'batch_size': 64,
    'seed': 42,
    'wandb_log': False,

    # data options
    'dataset_mode': 'combined_custom',  # '2024', '2024prac', '2024_custom', '2024prac_custom', or 'combined', 'combinedprac', 'combined_custom', 'combinedprac_custom'
    'split_ratio': 0.2,  # Ratio for validation split
    'augmentation': 'default',  # Options: 'default', 'strong', etc.
    'sampling': 'default',  # Options: 'default', 'weighted'

    # train options
    'pretrained': True,
    'num_epochs': 20,
    'split_option': 'trainprac',  # 'train', 'trainprac', or 'valid'
    'head_type': 'simpleheadv2',  # # 'simplehead', 'scsa', 'simpleheadv2'
    'valid_model_path': [],
    'input_size': (128, 128),  # Change input size here
    'loss_function': 'bce',  # 'bce' or 'focal'

    # Visualization
    'print_backbone': False,
    'monte_carlo_visual': False,
    'number_of_samples': 5000,

    # Sampler
    'use_weighted_sampler': False,  # Set to True to use the weighted sampler, False for normal distribution
    'scaling_factor': 1,  # Adjust this scaling factor as needed
    'freeze': 'unfreeze', # 'freezeall', 'unfreeze', 'freezecustom1'
}


# VALID CONFIGURATION

In [None]:
# VALID CONFIG
CONFIG = {
    'model_name': 'efficientnet_b3.ra2_in1k',
    'project_name': 'ISIC_2024_Competition',
    'artifact_name': 'isic2024-simplehead-model',
    'learning_rate': 0.0003,
    'batch_size': 64,
    'seed': 42,
    'wandb_log': False,
    
    # data options
    'dataset_mode': 'combined_custom',  # '2024', '2024prac', '2024_custom', '2024prac_custom', or 'combined', 'combinedprac', 'combined_custom', 'combinedprac_custom'
    'split_ratio': 0.2,  # Ratio for validation split
    'augmentation': 'default',  # Options: 'default', 'strong', etc.
    'sampling': 'default',  # Options: 'default', 'weighted'

    # train options
    'pretrained': False,
    'num_epochs': 20,
    'split_option': 'valid',  # 'train', 'trainprac', or 'valid'
    'head_type': 'simpleheadv2',  # # 'simplehead', 'scsa', 'simpleheadv2'
    'valid_model_path': [],
    'input_size': (128, 128),  # Change input size here
    'loss_function': 'bce',  # 'bce' or 'focal'

    # Visualization
    'print_backbone': False,
    'monte_carlo_visual': False,
    'number_of_samples': 5000,

    # Sampler
    'use_weighted_sampler': False,  # Set to True to use the weighted sampler, False for normal distribution
    'scaling_factor': 1,  # Adjust this scaling factor as needed
    'freeze': 'unfreeze',
}

# RUN IN ONE

In [None]:
def seeding(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True
    print("Seeding done ...")

In [None]:
def print_final_class_distribution(train_dir, valid_dir):
    train_dataset = ImageFolder(root=train_dir)
    valid_dataset = ImageFolder(root=valid_dir)

    train_labels = [sample[1] for sample in train_dataset.samples]
    valid_labels = [sample[1] for sample in valid_dataset.samples]

    train_distribution = Counter(train_labels)
    valid_distribution = Counter(valid_labels)

    print(f"Final training dataset class distribution: {dict(train_distribution)}")
    print(f"Final validation dataset class distribution: {dict(valid_distribution)}")


def main():
    seeding(CONFIG['seed'])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Define model save prefix
    model_save_prefix = f"{CONFIG['model_name'].replace('.', '')}_{CONFIG['head_type']}_{CONFIG['dataset_mode']}_{CONFIG['augmentation']}_{CONFIG['sampling']}_loss{CONFIG['loss_function']}_imgsz{CONFIG['input_size'][0]}"

    experiment_dir = os.path.join("./logs", model_save_prefix)
    plots_dir = os.path.join(experiment_dir, "plots")
    os.makedirs(plots_dir, exist_ok=True)

    if CONFIG['dataset_mode'] in ['2024_custom', '2024prac_custom', 'combined_custom', 'combinedprac_custom']:
        train_dir, valid_dir = create_train_valid_folders_custom(CONFIG)
        train_loader, valid_loader = get_datasets_based_on_split_option(train_dir, valid_dir, CONFIG)
    else:
        train_df, valid_df = split_dataset_standard(CONFIG)
        train_loader, valid_loader = process_standard_datasets(CONFIG)

    if CONFIG['split_option'] == 'trainprac':
        visualize_dataloader_batch(train_loader, "Training Practice Data Batch")
        model = train_model(train_loader, valid_loader, CONFIG, device, model_save_prefix)

    elif CONFIG['split_option'] == 'valid':
        visualize_dataloader_batch(valid_loader, "Validation Data Batch")
        for model_path in CONFIG['valid_model_path']:
            model = create_model(CONFIG, device, pretrained=False)  # Do not load pretrained weights for evaluation
            model.load_state_dict(torch.load(model_path))
            model = model.to(device)
            print(f"Evaluating model: {model_path}")

            # Evaluate the model for all epochs (assuming single evaluation call, no epoch multiple condition)
            evaluate_model(model, valid_loader, device, epoch=None, save_dir=plots_dir)  # Save evaluation results


In [None]:
main()

# Prepare the Results for download

In [None]:
import os
import shutil
import zipfile

In [None]:
def compress_folder_and_copy_notebook(folder_path, notebook_path, suffix):
    # Determine the parent directory of the folder and notebook
    folder_parent_dir = os.path.dirname(folder_path)
    notebook_parent_dir = os.path.dirname(notebook_path)
    
    # Extract the original names of the folder and notebook
    folder_name = os.path.basename(folder_path)
    notebook_name, notebook_ext = os.path.splitext(os.path.basename(notebook_path))
    
    # Create new names with the given suffix
    new_folder_name = f"{folder_name}_{suffix}"
    new_notebook_name = f"{notebook_name}_{suffix}{notebook_ext}"
    
    # Create new paths for the folder and notebook
    new_folder_path = os.path.join(folder_parent_dir, new_folder_name)
    new_notebook_path = os.path.join(notebook_parent_dir, new_notebook_name)
    
    # Rename the folder by moving it to the new path with the new name
    shutil.move(folder_path, new_folder_path)
    
    # Copy the notebook file with the new name
    shutil.copy2(notebook_path, new_notebook_path)
    
    # Set the output zip file name based on the new folder name
    output_zip_file = os.path.join(folder_parent_dir, f"{new_folder_name}.zip")
    
    # Compress the renamed folder into a ZIP file
    with zipfile.ZipFile(output_zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(new_folder_path):
            for file in files:
                # Create the full path of the file
                full_path = os.path.join(root, file)
                # Add file to the zip file with its relative path
                relative_path = os.path.relpath(full_path, os.path.join(new_folder_path, '..'))
                zipf.write(full_path, relative_path)
    
    # Optionally, remove the renamed folder after compression to clean up
    shutil.rmtree(new_folder_path)
    
    print(f'Folder {folder_path} renamed to {new_folder_name} and compressed into {output_zip_file}')
    print(f'Notebook {notebook_path} copied to {new_notebook_path}')

In [None]:
# Example usage:
folder_to_compress = '/workspace/logs'
notebook_to_copy = '/workspace/train_vastai.ipynb'
suffix = 'simplev2_classification'
compress_folder_and_copy_notebook(folder_to_compress, notebook_to_copy, suffix)