In [None]:
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
import torch
import os
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import DataLoader, WeightedRandomSampler

In [3]:
class MultiPartDataset(Dataset):
    def __init__(self, path_to_folders, label_csv, transform):
        self.meta_data = pd.read_csv(label_csv)
        self.path_to_folders = path_to_folders
        self.data = []
        
        dx, self.class_labels = pd.factorize(self.meta_data['dx'], sort=True)
        dx_type, _ = pd.factorize(self.meta_data['dx_type'], sort=True)
        age, _ = pd.factorize(self.meta_data['age'], sort=True)
        sex, _ = pd.factorize(self.meta_data['sex'], sort=True)
        for path in self.path_to_folders:
            files = os.listdir(path)

            localization, _ = pd.factorize(self.meta_data['localization'], sort=True)
            for file in sorted(files):
                idx = self.meta_data['image_id'] == file.split('.')[0]
                self.data.append((path, file, [dx_type[idx], age[idx], sex[idx], localization[idx]], dx[idx]))
        
        self.transform = transform
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data[idx][0], self.data[idx][1])
        image = Image.open(img_name)
        label = self.data[idx][3]
        attributes = np.array(self.data[idx][2])
        attributes = torch.from_numpy(attributes.flatten())

        if self.transform:
            image = self.transform(image)
        
        
        return image, attributes, label[0]

In [4]:
folders = ["/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1", "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2"]
meta_csv_file_path = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv"
transform = transforms.Compose([
    transforms.Resize((255, 255)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = MultiPartDataset(folders, meta_csv_file_path, transform)

In [5]:
data_loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

# Initialize the VGG model with pre-trained weights
model = models.resnet50(pretrained=True)
num_features = model.fc.in_features
num_classes = len(torch.unique(torch.tensor(dataset.meta_data['dx'].factorize()[0])))
model.fc = nn.Linear(num_features, num_classes)
print(num_classes)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:03<00:00, 31.2MB/s]


7


In [7]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [22]:
def make_weights_for_balanced_classes(dataset):
    unique_labels = np.unique(dataset.meta_data["dx"])
    class_counts = {label: 0 for label in unique_labels}
    
    for _, _, _, label in dataset.data:
        class_counts[dataset.class_labels[label][0]] += 1
        
    
    total_samples = len(dataset)
    weights = [total_samples / class_counts[label] for label in unique_labels.tolist()]
    
    return weights

weights = make_weights_for_balanced_classes(dataset)

In [24]:
from sklearn.model_selection import train_test_split

# Assuming 'dataset' is already defined
train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)


#weights = make_weights_for_balanced_classes(dataset)
sampler = WeightedRandomSampler(weights, len(weights))

train_loader = DataLoader(train_data, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

In [25]:
def compute_accuracy(model, test_loader, loss_fn):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    correct, total = 0, 0
    class_correct = [0] * 7
    class_total = [0] * 7
    loss_total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss_total += torch.sum(loss) / 8
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            for i in range(len(predicted)):
                class_correct[labels[i]] += (predicted[i] == labels[i]).item()
                class_total[labels[i]] += 1
            correct += (predicted == labels).sum().item()
    class_accuracy = [100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0 for i in range(7)]
    print(class_accuracy)

    return correct / total * 100, loss_total / len(test_loader)

In [26]:
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for images, attributes, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_accuracy = 100 * train_correct / train_total

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, attributes, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = 100 * val_correct / val_total
    print(f'Epoch {epoch+1}, Training Loss: {train_loss/train_total:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Loss: {val_loss/val_total:.4f}, Validation Accuracy: {val_accuracy:.2f}%')


Epoch 1, Training Loss: 0.2912, Training Accuracy: 0.00%, Validation Loss: 0.0613, Validation Accuracy: 12.53%
Epoch 2, Training Loss: 0.2384, Training Accuracy: 71.43%, Validation Loss: 0.0578, Validation Accuracy: 30.15%
Epoch 3, Training Loss: 0.2062, Training Accuracy: 100.00%, Validation Loss: 0.0546, Validation Accuracy: 37.09%
Epoch 4, Training Loss: 0.1788, Training Accuracy: 100.00%, Validation Loss: 0.0521, Validation Accuracy: 31.20%
Epoch 5, Training Loss: 0.1783, Training Accuracy: 42.86%, Validation Loss: 0.0516, Validation Accuracy: 22.72%
Epoch 6, Training Loss: 0.1244, Training Accuracy: 57.14%, Validation Loss: 0.0517, Validation Accuracy: 19.87%
Epoch 7, Training Loss: 0.1206, Training Accuracy: 85.71%, Validation Loss: 0.0543, Validation Accuracy: 11.78%
Epoch 8, Training Loss: 0.0698, Training Accuracy: 100.00%, Validation Loss: 0.0573, Validation Accuracy: 9.14%
Epoch 9, Training Loss: 0.0558, Training Accuracy: 100.00%, Validation Loss: 0.0598, Validation Accurac

In [27]:
# Save the model
model_path = 'resnet50_model_weighted_random_sampling.h5'
torch.save(model.state_dict(), model_path)
print(f'Model saved to {model_path}')

Model saved to resnet50_model_weighted_random_sampling.h5


In [55]:
# Load model from vbefore
# model_path = '/kaggle/working/resnet50_model_skin_cancer_unweighted.h5'
# model_path = 'resnet50_model_weighted_random_sampling.h5'
model = "/kaggle/input/resnet_50_skin_chkpt3/pytorch/checkpoint3_resnet_skin/1/ResNet50_checkpoint_3.pth"
model = models.resnet50(pretrained=False)  # Instantiate the model with the same architecture
num_features = model.fc.in_features
num_classes = len(torch.unique(torch.tensor(dataset.meta_data['dx'].factorize()[0])))
model.fc = nn.Linear(num_features, num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.load_state_dict(torch.load(model_path))  # Load the saved weights



<All keys matched successfully>

In [41]:
class_labels = list(dataset.class_labels)

class TestDataset(Dataset):
    def __init__(self, path_to_img_folder, path_to_csv, transform):
        self.labels = pd.read_csv(path_to_csv)
        self.path_to_img_folder = path_to_img_folder
        self.data = []
        
        i = 0
        for file in sorted(os.listdir(self.path_to_img_folder)):
            if file[-4:] == ".jpg":
                idx = self.labels['image'] == file.split('.')[0]
                class_name = self.labels[idx].eq(1.0).idxmax(axis=1).values[0].lower()
                self.data.append((file, class_labels.index(class_name)))
    
        self.data = np.array(self.data)
        self.transform = transform
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.path_to_img_folder, self.data[idx][0])
        image = Image.open(img_name)
        label = self.data[idx][1]

        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(int(label))

In [42]:
path_to_csv = "/kaggle/input/isic-2018-task-3/ISIC2018_Task3_Test_GroundTruth/ISIC2018_Task3_Test_GroundTruth/ISIC2018_Task3_Test_GroundTruth.csv"
path_to_test_img = "/kaggle/input/isic-2018-task-3/ISIC2018_Task3_Test_Input/ISIC2018_Task3_Test_Input"
transform = transforms.Compose([
    transforms.Resize(227),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
test_dataset = TestDataset(path_to_test_img, path_to_csv, transform)
test_loader = DataLoader(test_dataset, batch_size=8)

In [46]:
compute_accuracy(model, test_loader, criterion)

[2.3255813953488373, 49.46236559139785, 39.63133640552996, 0.0, 0.0, 7.040704070407041, 0.0]


(13.029100529100528, tensor(0.2352, device='cuda:0'))

In [44]:
df = pd.read_csv(path_to_csv)
class_counts = df.iloc[:, 1:].sum()

print(f"Class counts {class_counts}")

print(class_labels)

Class counts MEL      171.0
NV       909.0
BCC       93.0
AKIEC     43.0
BKL      217.0
DF        44.0
VASC      35.0
dtype: float64
['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']


In [56]:
from sklearn.metrics import balanced_accuracy_score
def computeaccuracy(model, test_loader, loss_fn, class_labels):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    correct, total = 0, 0
    class_correct = [0] * 7
    class_total = [0] * 7
    loss_total = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss_total += torch.sum(loss) / 8 
            _ , predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        


            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

            for i in range(len(labels)):
                label = labels[i].item()
                pred = predicted[i].item()
                class_correct[label] += (pred == label)
                class_total[label] += 1

    label_accuracy = {}
    class_accuracy = [100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0 for i in range(7)]
    print("Class-specific Accuracies:", class_accuracy)
    for i, acc in enumerate(class_accuracy):
        label_accuracy[class_labels[i]] = acc

    standard_accuracy = correct / total * 100

    balanced_acc = balanced_accuracy_score(all_labels, all_preds)


    return standard_accuracy, balanced_acc, loss_total / len(test_loader), label_accuracy

computeaccuracy(model, test_loader, criterion, class_labels)

Class-specific Accuracies: [65.11627906976744, 75.26881720430107, 61.29032258064516, 29.545454545454547, 40.35087719298246, 78.98789878987898, 68.57142857142857]


(69.77513227513228,
 0.5987586827920832,
 tensor(0.1079, device='cuda:0'),
 {'akiec': 65.11627906976744,
  'bcc': 75.26881720430107,
  'bkl': 61.29032258064516,
  'df': 29.545454545454547,
  'mel': 40.35087719298246,
  'nv': 78.98789878987898,
  'vasc': 68.57142857142857})