# Setup and Installations

In [None]:
!pip install --upgrade git+https://github.com/huggingface/transformers.git
!pip install -q datasets
!pip install matplotlib
!pip install -U transformers
!pip install scikit-learn pillow torchvision opencv-python
!pip install tensorboardX
!pip install torch
!pip install numpy
!pip install pandas
!pip install -U accelerate

# In Google Colab, use /content folder for uploading lump.zip

In [None]:
!unzip lump.zip

# Data Loading and Preparation

In [None]:

from datasets import load_dataset, DatasetDict, Dataset
from sklearn.model_selection import train_test_split
from PIL import Image
import torchvision.transforms as transforms
import pandas as pd
import numpy as np

# Load and prepare dataset
csv_file = 'lump/merged.csv'  # Modify this path as needed
df = pd.read_csv(csv_file)
df.columns = df.columns.str.strip()
print("Columns in the DataFrame:", df.columns)

# Define classes for acne classification
class_labels = ['blackheads', 'dark_spot', 'nodules', 'papules', 'pustules', 'whiteheads']

# Convert DataFrame to Dataset
dataset = Dataset.from_pandas(df)

# Split the dataset into train, validation, and test sets
ds = dataset.train_test_split(test_size=0.3)  # 70% train, 30% test
ds_test = ds['test'].train_test_split(test_size=0.5)  # 30% test --> 15% valid, 15% test

prepared_ds = DatasetDict({
    'train': ds['train'],
    'test': ds_test['test'],
    'valid': ds_test['train']
})

del ds_test

print(prepared_ds)


# Data Transformation

Must run this step with a provided processor before proceeding with Model Training and Saving!

In [None]:
from PIL import Image
import torchvision.transforms as transforms
import torch
from transformers import AutoImageProcessor, ViTForImageClassification
import transformers

# List of possible acne-related skin conditions
class_labels = ['blackheads', 'dark spot', 'nodules', 'papules', 'pustules', 'whiteheads']

def transform(example_batch):
    # Define the desired image size
    desired_size = (224, 224)
    images = []
    root_dir = 'lump/images'
    
    # Load and resize images
    # The function goes through the filenames in example_batch and loads each image from the lump/images directory, 
    # converting it to RGB format (ensures 3 color channels).
    for img_path in example_batch['filename']:
        img = Image.open(f"{root_dir}/{img_path}").convert("RGB")
        img_resized = transforms.Resize(desired_size)(img)
        images.append(img_resized)

    # Process images with the processor to convert them into tensors
    # processor (from AutoImageProcessor) turns the list of images into tensors that can be fed into a neural network.
    # return_tensors='pt' means it will return a PyTorch tensor.
    inputs = processor(images, return_tensors='pt')

    # One-hot encode the labels
    # The function gets the relevant labels from example_batch and creates a zero matrix (labels_matrix)
    # with a shape of (number_of_images, number_of_classes).
    labels_batch = {k.strip(): example_batch[k.strip()] for k in example_batch.keys() if k.strip() in class_labels}
    labels_matrix = torch.zeros((len(images), len(class_labels)))

    # Each label is one-hot encoded by setting the correct positions in the matrix to 1 based on the input
    for idx, label in enumerate(class_labels):
        labels_matrix[:, idx] = torch.tensor(labels_batch[label])

    # Add labels to the inputs
    # The labels are added to the inputs dictionary, which has the image tensors, making it easy to use for training.
    inputs['labels'] = labels_matrix
    
    # Return inputs, which now includes both processed images and one-hot encoded labels
    return inputs

# Load a pre-trained image processor from the transformers library
# processor is created using the AutoImageProcessor class, loading a pre-trained model ("facebook/convnextv2-tiny-1k-224") 
# that helps prepare images in the right format for classification.
processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")

# Apply the transform function to the dataset, so each batch is processed on the fly
# prepared_ds applies the transform function to an existing dataset ds using the .with_transform() method.
# This means whenever the dataset is accessed, the transform function will process the data on the fly for training or testing.
prepared_ds = ds.with_transform(transform)

# Print the transformed dataset to check the output
print(prepared_ds)


In [None]:
# This function combines individual image and label samples into a single batch for training or inference.
def collate_fn(batch):
    # Stack the 'pixel_values' from each item in the batch to create a single tensor.
    # This results in a 4D tensor (batch_size, channels, height, width), where each entry is an image.
    pixel_values = torch.stack([x['pixel_values'] for x in batch])
    
    # Stack the 'labels' from each item in the batch to create a single tensor.
    # This results in a 2D tensor (batch_size, number_of_classes), where each entry is a one-hot encoded label.
    labels = torch.stack([x['labels'] for x in batch])
    
    # Return a dictionary containing the batched pixel values and labels.
    # This format is suitable for training or inference, where the model receives the inputs in batch.
    return {
        'pixel_values': pixel_values,
        'labels': labels
    }


# Model Training and Saving
Adjust num_train_epochs in Training Arguments to higher value for more fit.
Note: save_model_to_bestmodels does not evaluate accuracy, it only saves the trained model.

In [None]:

from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer
import os

cl = ['blackheads', 'dark_spot', 'nodules', 'papules', 'pustules', 'whiteheads']

# Function to train a model and save it to the 'bestmodels' directory
def save_model_to_bestmodels(model_id, model_name):
    print(f"Training and evaluating model: {model_id}")

    # Load a pre-trained image processor to handle image inputs
    processor = AutoImageProcessor.from_pretrained(model_id)
    
    # Load a pre-trained model for image classification, adjusted for multi-label classification
    model = AutoModelForImageClassification.from_pretrained(
        model_id,
        num_labels=6,  # Set to classify 6 different labels
        id2label={str(i): c for i, c in enumerate(cl)},  # Map indices to class labels
        label2id={c: str(i) for i, c in enumerate(cl)},  # Map class labels to indices
        ignore_mismatched_sizes=True,  # Allow flexibility with input size
        problem_type="multi_label_classification"  # Specify that this is a multi-label problem
    )

    # Set up training arguments to configure how the model will be trained
    training_args = TrainingArguments(
        output_dir=f"bestmodels/{model_name}",  # Directory to save the model
        per_device_train_batch_size=16,  # Batch size for training
        evaluation_strategy="no",  # No evaluation, this function only trains!
        save_strategy="epoch",  # Save the model at the end of each epoch
        fp16=True,  # Use mixed-precision training for better performance, only available with GPU!
        num_train_epochs=1,  # Train for 1 epoch
        logging_steps=500,  # Log progress every 500 steps
        learning_rate=2e-4,  # Set learning rate for optimization
        save_total_limit=1,  # Keep only the latest model saved
        remove_unused_columns=False,  # Retain all data features during training
        push_to_hub=False,  # Do not push the model to Hugging Face Hub
        report_to='tensorboard',  # Send training logs to TensorBoard
    )

    # Set up the Trainer to manage the training process
    trainer = Trainer(
        model=model,  # The model to train
        args=training_args,  # Training configuration
        data_collator=collate_fn,  # Handle batch processing
        train_dataset=prepared_ds["train"],  # The training dataset
        tokenizer=processor,  # Use the processor for image handling
    )

    # Start training the model
    train_results = trainer.train()
    
    # Save the trained model to the specified directory
    trainer.save_model(f"bestmodels/{model_name}")
    
    # Print a message confirming the model has been saved
    print(f"Model saved to 'bestmodels/{model_name}'")


# Create 'bestmodels' directory if it doesn't exist
os.makedirs("bestmodels", exist_ok=True)

# Models to train and save
model_list = [
    ("google/vit-base-patch16-224", "vit"),
    ("openai/clip-vit-base-patch32", "clip"),
    ("google/siglip-base-patch16-224", "siglip"),
    ("facebook/convnextv2-tiny-1k-224", "convnext"),
    ("apple/mobilevitv2-1.0-imagenet1k-256", "mobilevit"),
    ("google/mobilenet_v1_1.0_224", "mobilenet")
]

# Train and save each model
for model_id, model_name in model_list:
    save_model_to_bestmodels(model_id, model_name)

