<a href="https://colab.research.google.com/github/heyronith/youtube/blob/main/mPox_DL_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import and Set Up

In [1]:
#packages needed

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from PIL import Image
import pandas as pd
import numpy as np
import os
import zipfile
from sklearn.model_selection import train_test_split
import albumentations as A


In [2]:
# Extract the dataset
zip_path = '/content/Monkeypox Skin Image Dataset.zip'  # Adjust this path if necessary
extract_path = '/content'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset extracted successfully.")

Dataset extracted successfully.


In [3]:


def create_dataset_df(data_dir):
    image_paths = []
    labels = []

    for class_folder in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_folder)
        if os.path.isdir(class_path):
            for image_file in os.listdir(class_path):
                if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    image_paths.append(os.path.join(class_folder, image_file))
                    labels.append(class_folder)

    df = pd.DataFrame({'image_path': image_paths, 'label': labels})
    return df

# Create the original dataset DataFrame
extract_path = '/content/Monkeypox Skin Image Dataset'
df_original = create_dataset_df(extract_path)

print("Original dataset created.")
print(f"Dataset size: {len(df_original)}")
print("\nClass distribution:")
print(df_original['label'].value_counts())

# Split the data into train and test sets
train_df, test_df = train_test_split(df_original, test_size=0.2, stratify=df_original['label'], random_state=42)

print("\nTraining set size:", len(train_df))
print("Test set size:", len(test_df))

# Save the train and test DataFrames to CSV files
train_df.to_csv('train_original.csv', index=False)
test_df.to_csv('test_original.csv', index=False)

print("\nCSV files created.")

Original dataset created.
Dataset size: 770

Class distribution:
label
Normal        293
Monkeypox     279
Chickenpox    107
Measles        91
Name: count, dtype: int64

Training set size: 616
Test set size: 154

CSV files created.


# Training on Original Dataset

In [None]:

# Custom Model Definition
class SEBlock(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

class FPN(nn.Module):
    def __init__(self, in_channels_list, out_channels):
        super(FPN, self).__init__()
        self.lateral_convs = nn.ModuleList([
            nn.Conv2d(in_channels, out_channels, kernel_size=1)
            for in_channels in in_channels_list
        ])
        self.fpn_convs = nn.ModuleList([
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
            for _ in in_channels_list
        ])

    def forward(self, features):
        last_feature = self.lateral_convs[-1](features[-1])
        fpn_features = [self.fpn_convs[-1](last_feature)]

        for feature, lateral_conv, fpn_conv in zip(
            features[-2::-1], self.lateral_convs[-2::-1], self.fpn_convs[-2::-1]
        ):
            lateral = lateral_conv(feature)
            feat_shape = lateral.shape[-2:]
            top_down = F.interpolate(last_feature, size=feat_shape, mode='nearest')
            last_feature = lateral + top_down
            fpn_features.append(fpn_conv(last_feature))

        return fpn_features[::-1]

class CustomModel(nn.Module):
    def __init__(self, num_classes=4):
        super(CustomModel, self).__init__()
        self.efficientnet = models.efficientnet_v2_s(pretrained=True)

        # Get the number of features from the last layer of EfficientNetV2
        num_features = self.efficientnet.classifier[1].in_features

        # Remove the classifier
        self.efficientnet = nn.Sequential(*list(self.efficientnet.children())[:-1])

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(num_features, 512)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 256)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.efficientnet(x)
        x = self.gap(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Initialize the model
model = CustomModel()
print(f"Model initialized with {sum(p.numel() for p in model.parameters())} parameters")


# Custom Dataset
class MonkeypoxDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.class_to_idx = {'Normal': 0, 'Monkeypox': 1, 'Chickenpox': 2, 'Measles': 3}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        label = self.class_to_idx[self.data.iloc[idx, 1]]

        if self.transform:
            image = self.transform(image)

        return image, label

# Data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets and dataloaders
train_dataset = MonkeypoxDataset('train_original.csv', '/content/Monkeypox Skin Image Dataset', transform=transform)
test_dataset = MonkeypoxDataset('test_original.csv', '/content/Monkeypox Skin Image Dataset', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# Initialize the model
model = CustomModel()

# Move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Loss function (Focal Loss)
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return focal_loss.mean()

criterion = FocalLoss()

# Optimizer and learning rate scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

# Training function
def train(model, train_loader, criterion, optimizer, scheduler, device):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

        if i % 10 == 0:  # Print every 10 mini-batches
            print(f'Batch {i}, Loss: {loss.item():.4f}')

    scheduler.step()
    epoch_loss = running_loss / len(train_loader.dataset)
    return epoch_loss

# Evaluation function
def evaluate(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    # For AUC, we need probability scores
    all_probs = []
    with torch.no_grad():
        for inputs, _ in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            probs = F.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())

    all_probs = np.array(all_probs)
    auc = roc_auc_score(all_labels, all_probs, multi_class='ovr', average='weighted')

    return accuracy, precision, recall, f1, auc

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 100
best_accuracy = 0.0

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss = train(model, train_loader, criterion, optimizer, scheduler, device)
    accuracy, precision, recall, f1, auc = evaluate(model, test_loader, device)

    print(f"Train Loss: {train_loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}, AUC: {auc:.4f}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), 'best_model.pth')

print("Training completed.")

# Load the best model and evaluate
model.load_state_dict(torch.load('best_model.pth'))
final_accuracy, final_precision, final_recall, final_f1, final_auc = evaluate(model, test_loader, device)

print("Final Results on Original Dataset:")
print(f"Accuracy: {final_accuracy:.4f}")
print(f"Precision: {final_precision:.4f}")
print(f"Recall: {final_recall:.4f}")
print(f"F1-score: {final_f1:.4f}")
print(f"AUC: {final_auc:.4f}")

Model initialized with 20965716 parameters




Epoch 1/100
Batch 0, Loss: 0.7889
Batch 10, Loss: 0.6377


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4892
Test Accuracy: 0.6494, Precision: 0.6016, Recall: 0.6494, F1: 0.6204, AUC: 0.8902
Epoch 2/100
Batch 0, Loss: 0.3949
Batch 10, Loss: 0.2018
Train Loss: 0.3488
Test Accuracy: 0.7597, Precision: 0.8563, Recall: 0.7597, F1: 0.7730, AUC: 0.9299
Epoch 3/100
Batch 0, Loss: 0.1677
Batch 10, Loss: 0.4253
Train Loss: 0.2362
Test Accuracy: 0.7792, Precision: 0.8257, Recall: 0.7792, F1: 0.7903, AUC: 0.9530
Epoch 4/100
Batch 0, Loss: 0.1241
Batch 10, Loss: 0.2182
Train Loss: 0.1441
Test Accuracy: 0.8117, Precision: 0.8549, Recall: 0.8117, F1: 0.8206, AUC: 0.9590
Epoch 5/100
Batch 0, Loss: 0.1078
Batch 10, Loss: 0.0418
Train Loss: 0.1417
Test Accuracy: 0.8831, Precision: 0.8820, Recall: 0.8831, F1: 0.8783, AUC: 0.9816
Epoch 6/100
Batch 0, Loss: 0.0376
Batch 10, Loss: 0.0372
Train Loss: 0.0635
Test Accuracy: 0.9026, Precision: 0.9202, Recall: 0.9026, F1: 0.9038, AUC: 0.9796
Epoch 7/100
Batch 0, Loss: 0.0589
Batch 10, Loss: 0.0352
Train Loss: 0.0402
Test Accuracy: 0.8766, Precision:


# Check out the model inference here : https://huggingface.co/spaces/ronithsharmila/PoxNet

The model was deployed to Hugging Face Spaces using the instructions mentioned on this link:

