In [None]:
import os
import zipfile

In [None]:
zip_file_path_dataset = '/workspace/datasets/isic-2024-challenge.zip'
extract_to_dir_dataset = '/workspace/datasets/isic-2024-challenge'
os.makedirs(extract_to_dir_dataset, exist_ok=True)
with zipfile.ZipFile(zip_file_path_dataset, 'r') as zip_ref:
    zip_ref.extractall(extract_to_dir_dataset)
print(f'Files extracted to {extract_to_dir_dataset}')

In [None]:
!pip install pandas wandb timm scikit-learn

# TRAIN CONFIGURATION

In [None]:
import os
import random
import numpy as np
import pandas as pd
import timm
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from torch.optim import AdamW
from torch.nn import BCELoss
from tqdm import tqdm
import torch.nn as nn

# Configuration dictionary
config = {
    'batch_size': 64,
    'num_classes': 1,  # Binary classification (single output with logits)
    'learning_rate': 1e-4,
    'num_epochs': 35,
    'seed': 42,
    'model_names': [  # List of models to train
        'selecsls42b.in1k',
        # 'nextvit_small.bd_in1k_384',
        # 'efficientnet_b3.ra2_in1k'
    ]
}

# Seeding function
def seeding(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True
    print("Seeding done ...")

# Dataset Preprocessing (2024 Only)
def load_and_preprocess_data():
    # Load 2024 metadata
    isic2024_path = './datasets/isic-2024-challenge/train-metadata.csv'
    df_2024 = pd.read_csv(isic2024_path)

    # Add a column for the image path based on the 'isic_id'
    df_2024['image_path'] = './datasets/isic-2024-challenge/train-image/image/' + df_2024['isic_id'] + '.jpg'

    # Add a column to indicate the year (for reference or potential further use)
    df_2024['year'] = 2024

    # Return the 2024 dataset
    return df_2024

# Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_path']
        image = Image.open(img_path).convert("RGB")
        label = self.dataframe.iloc[idx]['target']

        if self.transform:
            image = self.transform(image)

        return image, label

# DataLoader Function with Model-Specific Transforms
def get_dataloader(config, dataframe, model_name):
    # Resolve model-specific data config
    base_model = timm.create_model(model_name, pretrained=True, num_classes=config['num_classes'])
    data_config = timm.data.resolve_model_data_config(base_model)
    transform = timm.data.create_transform(**data_config, is_training=False)

    dataset = CustomDataset(dataframe=dataframe, transform=transform)
    dataloader = DataLoader(dataset, batch_size=config['batch_size'], shuffle=True)

    return dataloader

# Training Loop
def train_model(model_name, config, train_dataloader):
    print(f"Training model: {model_name}")

    # Create a directory to save model weights for each epoch
    model_dir = os.path.join('./logs', model_name)
    os.makedirs(model_dir, exist_ok=True)

    # Load the model
    base_model = timm.create_model(model_name, pretrained=True, num_classes=config['num_classes'])  # Binary classification

    # Add sigmoid layer
    model = nn.Sequential(
        base_model,
        nn.Sigmoid()  # Adds a sigmoid layer to the model
    )

    # Move the model to the appropriate device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Define the loss function and optimizer
    criterion = BCELoss()  # BCELoss expects probabilities (after sigmoid)
    optimizer = AdamW(model.parameters(), lr=config['learning_rate'])

    # Training loop
    for epoch in range(config['num_epochs']):
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{config['num_epochs']}"):
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(
                1)  # Convert labels to float and reshape

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Print loss for this epoch
        avg_loss = running_loss / len(train_dataloader)
        print(f"Epoch [{epoch + 1}/{config['num_epochs']}], Loss: {avg_loss:.4f}")

        # Save model weights after every epoch
        model_path = os.path.join(model_dir, f'epoch_{epoch + 1}.pth')
        torch.save(model.state_dict(), model_path)
        print(f"Saved model weights to {model_path}")

    print(f"Finished training model: {model_name}\n")

    return model

# Main Function to Combine Everything
def main():
    # Set the seed
    seeding(config['seed'])

    # Load and preprocess the data
    df_train = load_and_preprocess_data()

    # Train each model
    for model_name in config['model_names']:
        # Create DataLoader with model-specific transforms
        train_dataloader = get_dataloader(config, df_train, model_name)
        model = train_model(model_name, config, train_dataloader)

if __name__ == "__main__":
    main()


# Results Saving

In [None]:
import shutil

def compress_folder_and_copy_notebook(folder_path, notebook_path, suffix):
    # Determine the parent directory of the folder and notebook
    folder_parent_dir = os.path.dirname(folder_path)
    notebook_parent_dir = os.path.dirname(notebook_path)
    
    # Extract the original names of the folder and notebook
    folder_name = os.path.basename(folder_path)
    notebook_name, notebook_ext = os.path.splitext(os.path.basename(notebook_path))
    
    # Create new names with the given suffix
    new_folder_name = f"{folder_name}_{suffix}"
    new_notebook_name = f"{notebook_name}_{suffix}{notebook_ext}"
    
    # Create new paths for the folder and notebook
    new_folder_path = os.path.join(folder_parent_dir, new_folder_name)
    new_notebook_path = os.path.join(notebook_parent_dir, new_notebook_name)
    
    # Rename the folder by moving it to the new path with the new name
    shutil.move(folder_path, new_folder_path)
    
    # Copy the notebook file with the new name
    shutil.copy2(notebook_path, new_notebook_path)
    
    # Set the output zip file name based on the new folder name
    output_zip_file = os.path.join(folder_parent_dir, f"{new_folder_name}.zip")
    
    # Compress the renamed folder into a ZIP file
    with zipfile.ZipFile(output_zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(new_folder_path):
            for file in files:
                # Create the full path of the file
                full_path = os.path.join(root, file)
                # Add file to the zip file with its relative path
                relative_path = os.path.relpath(full_path, os.path.join(new_folder_path, '..'))
                zipf.write(full_path, relative_path)
    
    # Optionally, remove the renamed folder after compression to clean up
    shutil.rmtree(new_folder_path)
    
    print(f'Folder {folder_path} renamed to {new_folder_name} and compressed into {output_zip_file}')
    print(f'Notebook {notebook_path} copied to {new_notebook_path}')

In [None]:
# Example usage:
folder_to_compress = '/workspace/logs'
notebook_to_copy = '/workspace/train_vastai.ipynb'
suffix = 'effnet3'
compress_folder_and_copy_notebook(folder_to_compress, notebook_to_copy, suffix)