
# Installing Dependencies and Packages

In [None]:
!pip install data_reader
from data_reader import *
from torchvision import transforms
import csv
import torch as torch
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
from tqdm import tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting data_reader
  Downloading data_reader-0.0.8-py3-none-any.whl (6.0 kB)
Collecting ijson
  Downloading ijson-3.2.0.post0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.5/112.5 KB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ijson, data_reader
Successfully installed data_reader-0.0.8 ijson-3.2.0.post0


#Loading Data, Labels

In [None]:
import os
CS188_path = './'
os.makedirs(os.path.join(CS188_path, 'FinalProject'), exist_ok=True)
root_dir = os.path.join(CS188_path, 'FinalProject')

In [None]:
import shutil
from google.colab import files
from zipfile import ZipFile
#Download Training and Validation CSV
!gdown 1xUeYl1odxxlgiCo9OGimvB3L6k4_uN9N
!gdown 1o62AjBbs-itqAHULcZeiEGRGRudMRVom
#Download img training and validation data zipfile with 10015 images -- is taking me ~10-30 seconds
!gdown 1fZGdows23YXLq-XiJgBVbLqyD_KlIUnV
!gdown 1sciwDsAlNWkVg_qIK-nRijeNMlqEAroH
#Move files to ./FinalProject folder
shutil.move("./ISIC2018_Task3_Training_GroundTruth.csv", "./FinalProject/ISIC2018_Task3_Training_GroundTruth.csv")
shutil.move("./ISIC2018_Task3_Validation_GroundTruth.csv", "./FinalProject/ISIC2018_Task3_Validation_GroundTruth.csv")


Downloading...
From: https://drive.google.com/uc?id=1xUeYl1odxxlgiCo9OGimvB3L6k4_uN9N
To: /content/ISIC2018_Task3_Training_GroundTruth.csv
100% 421k/421k [00:00<00:00, 147MB/s]
Downloading...
From: https://drive.google.com/uc?id=1o62AjBbs-itqAHULcZeiEGRGRudMRVom
To: /content/ISIC2018_Task3_Validation_GroundTruth.csv
100% 8.14k/8.14k [00:00<00:00, 13.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1fZGdows23YXLq-XiJgBVbLqyD_KlIUnV
To: /content/ISIC2018_Task3_Training_Input.zip
100% 2.78G/2.78G [00:32<00:00, 85.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1sciwDsAlNWkVg_qIK-nRijeNMlqEAroH
To: /content/ISIC2018_Task3_Validation_Input.zip
100% 53.4M/53.4M [00:01<00:00, 28.7MB/s]


'./FinalProject/ISIC2018_Task3_Validation_GroundTruth.csv'

In [None]:
from zipfile import ZipFile
train_file_name = "ISIC2018_Task3_Training_Input.zip"
val_file_name = "ISIC2018_Task3_Validation_Input.zip"
def unzip(source_filename, dest_dir):
    with ZipFile(source_filename) as zf:
        zf.extractall(dest_dir)

#Unzip all 10,015 images to "./FinalProject/ISIC2018_Task3_Training_Input" and "./FinalProject/ISIC2018_Task3_Validation_Input"
#Taking me ~13 seconds
unzip(train_file_name, "./FinalProject/")
unzip(val_file_name, "./FinalProject/")


In [None]:
label_directory = {0: 'MEL', 1: 'NV', 2: 'BCC', 3: 'AKIEC', 4: 'BKL', 5: 'DF', 6: 'VASC'}

In [None]:
class SkinLesions(Dataset):
    def __init__(self, root_dir, foldername, label_dict = None, transform=None, size = None):
        self.root_dir = root_dir
        self.foldername = foldername
        self.transform = transform
        self.filenames = []
        self.labels = []
        csvfile=os.path.join(root_dir, foldername)

        if foldername == 'ISIC2018_Task3_Validation_GroundTruth.csv':
          image_folder = 'ISIC2018_Task3_Validation_Input'
        else:
          image_folder = 'ISIC2018_Task3_Training_Input'

        i = 0
        with open(csvfile, 'r') as csvfile:
          csvreader = csv.reader(csvfile, delimiter = ',')
          for line in csvreader:
            if i==0:
                pass
            else:
                img_path=os.path.join(root_dir, image_folder, line[0] + ".jpg")
                if os.path.exists(img_path):
                    self.filenames.append(img_path)
                    self.labels.append(np.argmax(line[1:]))
                else:
                    print("{} does not exists".format(img_path))
                    exit(0)
                if size:
                  if i == size:
                    break
            i+=1

    def __len__(self):
        dataset_len = len(self.labels)
        return dataset_len


    def __getitem__(self, idx):
        img_path = self.filenames[idx]
        label = self.labels[idx]

        img_full_path = os.path.join(img_path)

        image = Image.open(img_full_path)
        image = data_transform(image)
        return image, label


## Transforms for the data

In [None]:
data_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])


In [None]:
def tensor_to_image(image):
    tensor_image = image.clone().detach()
    inv_norm = transforms.Normalize((-0.485/0.229, -0.456/0.224, -0.406/0.255), (1/0.229, 1/0.224, 1/0.255))
    inv_tensor = inv_norm(image)
    tensor_transposed = inv_tensor.permute(1, 2, 0)
    tensor_clamped = torch.clamp(tensor_transposed, min = 0, max = 255)
    image = tensor_clamped.numpy()
    return image

# Model Constructions

In this section, we will intialize the various models used for the project along with training and evaluating functions.



In [None]:
# Define the device to use for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device == torch.device('cuda'):
    print(f'Using device: {device}. Good to go!')
else:
    print('Please set GPU via Edit -> Notebook Settings.')

Using device: cuda. Good to go!


In [None]:
def train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs):
    # Place model on device
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        # Use tqdm to display a progress bar during training
        with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{num_epochs}') as pbar:
            for inputs, labels in train_loader:
                # Move inputs and labels to device
                inputs = inputs.to(device)
                labels = labels.to(device)
                # Zero out gradients
                optimizer.zero_grad()
                # Compute the logits and loss
                logits = model(inputs)
                loss = criterion(logits, labels)
                # Backpropagate the loss
                loss.backward()
                # Update the weights
                optimizer.step()
                # Update the progress bar
                pbar.update(1)
                pbar.set_postfix(loss=loss.item())

        # Evaluate the model on the validation set
        avg_loss, accuracy = evaluate(model, val_loader, criterion, device)
        print(f'Validation set: Average loss = {avg_loss:.4f}, Accuracy = {accuracy:.4f}')

def evaluate(model, test_loader, criterion, device):
    model.eval()  # Set model to evaluation mode

    with torch.no_grad():
        total_loss = 0.0
        num_correct = 0
        num_samples = 0

        for inputs, labels in test_loader:
            # Move inputs and labels to device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Compute the logits and loss
            logits = model(inputs)
            loss = criterion(logits, labels)
            total_loss += loss.item()

            # Compute the accuracy
            _, predictions = torch.max(logits, dim=1)
            num_correct += (predictions == labels).sum().item()
            num_samples += len(inputs)

    # Compute the average loss and accuracy
    avg_loss = total_loss / len(test_loader)
    accuracy = num_correct / num_samples

    return avg_loss, accuracy

## Model #1: Initializing ResNet

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out


class ResNet50(nn.Module):
    def __init__(self, block, layers, num_classes = 7):
        super(ResNet50, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512, num_classes)
        normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
        self.data_transform = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),normalize,])

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)


    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

## Model #2: Initializing AlexNet

In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=7):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

## Model #3: Initializing VGG16

In [None]:
class VGG16(nn.Module):
    def __init__(self, num_classes=7):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*512, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

        normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
        self.data_transform = transforms.Compose([transforms.Resize((227,227)), transforms.ToTensor(), normalize,])

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

## Loading the Data

In [None]:
resize = 227

normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

batch_size = 32
num_workers = 2

train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform)
val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

val_loader = torch.utils.data.DataLoader(
    val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

## Training and Evaluating the Model

### Training ResNet50

In [None]:
#ResNet50 takes a different size input so we redefine the train_data here.

resize_50 = 224

normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
data_transform = transforms.Compose([transforms.Resize((resize_50,resize_50)),transforms.ToTensor(),normalize,])

batch_size = 32
num_workers = 2

train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform)
val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

train_loader2 = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

val_loader2 = torch.utils.data.DataLoader(
    val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

In [None]:
model = ResNet50(ResidualBlock, [3, 4, 6, 3]).to(device)

learning_rate = 0.005
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

train(model, train_loader2, val_loader2, optimizer, criterion, device, num_epochs= 5)

Epoch 1/5:  69%|██████▉   | 22/32 [00:08<00:03,  2.63it/s, loss=0.967]


KeyboardInterrupt: ignored

### Training AlexNet

In [None]:
model, resize = AlexNet(), 227


learning_rate = 0.005
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs= 5)

Epoch 1/5: 100%|██████████| 32/32 [00:11<00:00,  2.77it/s, loss=0.886]


Validation set: Average loss = 1.1761, Accuracy = 0.5959


Epoch 2/5: 100%|██████████| 32/32 [00:11<00:00,  2.90it/s, loss=0.835]


Validation set: Average loss = 1.0470, Accuracy = 0.6839


Epoch 3/5: 100%|██████████| 32/32 [00:11<00:00,  2.86it/s, loss=0.251]


Validation set: Average loss = 1.0420, Accuracy = 0.6373


Epoch 4/5: 100%|██████████| 32/32 [00:13<00:00,  2.32it/s, loss=0.231]


Validation set: Average loss = 1.0308, Accuracy = 0.6788


Epoch 5/5: 100%|██████████| 32/32 [00:11<00:00,  2.76it/s, loss=0.554]


Validation set: Average loss = 1.0008, Accuracy = 0.6632


### Training VGG16

In [None]:
model, resize = VGG16()

learning_rate = 0.005
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs=20)

# Experiment 1: Varying the amount of input data given to the model to train on


In [None]:
test_sizes = [400, 2000, 5000]

In [None]:
for i in test_sizes:
  resize = 224

  normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
  data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

  batch_size = 32
  num_workers = 2

  train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform, size = i)
  val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

  train_loader = torch.utils.data.DataLoader(
      train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

  val_loader = torch.utils.data.DataLoader(
      val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

  model = ResNet50(ResidualBlock, [3, 4, 6, 3]).to(device)

  learning_rate = 0.005
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

  print('Training model on n = ', i, ' data points for ResNet50')
  train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs= 5)

Training model on n =  400  data points for ResNet50


Epoch 1/5: 100%|██████████| 13/13 [00:05<00:00,  2.42it/s, loss=0.603]


Validation set: Average loss = 1.4018, Accuracy = 0.6373


Epoch 2/5: 100%|██████████| 13/13 [00:06<00:00,  2.14it/s, loss=0.986]


Validation set: Average loss = 2.7033, Accuracy = 0.6321


Epoch 3/5: 100%|██████████| 13/13 [00:04<00:00,  2.69it/s, loss=1.18]


Validation set: Average loss = 1.5797, Accuracy = 0.5907


Epoch 4/5: 100%|██████████| 13/13 [00:05<00:00,  2.24it/s, loss=1.12]


Validation set: Average loss = 1.1794, Accuracy = 0.6373


Epoch 5/5: 100%|██████████| 13/13 [00:05<00:00,  2.47it/s, loss=0.988]


Validation set: Average loss = 1.2329, Accuracy = 0.5337
Training model on n =  2000  data points for ResNet50


Epoch 1/5: 100%|██████████| 63/63 [00:25<00:00,  2.47it/s, loss=1.35]


Validation set: Average loss = 1.3522, Accuracy = 0.6010


Epoch 2/5: 100%|██████████| 63/63 [00:25<00:00,  2.44it/s, loss=0.752]


Validation set: Average loss = 0.9895, Accuracy = 0.6580


Epoch 3/5: 100%|██████████| 63/63 [00:25<00:00,  2.48it/s, loss=0.528]


Validation set: Average loss = 1.1362, Accuracy = 0.6425


Epoch 4/5: 100%|██████████| 63/63 [00:25<00:00,  2.46it/s, loss=1.64]


Validation set: Average loss = 1.0405, Accuracy = 0.6891


Epoch 5/5: 100%|██████████| 63/63 [00:25<00:00,  2.51it/s, loss=0.969]


Validation set: Average loss = 0.9573, Accuracy = 0.6632
Training model on n =  5000  data points for ResNet50


Epoch 1/5: 100%|██████████| 157/157 [01:04<00:00,  2.42it/s, loss=0.367]


Validation set: Average loss = 0.9979, Accuracy = 0.6580


Epoch 2/5: 100%|██████████| 157/157 [01:09<00:00,  2.27it/s, loss=0.903]


Validation set: Average loss = 1.4421, Accuracy = 0.6528


Epoch 3/5: 100%|██████████| 157/157 [01:03<00:00,  2.47it/s, loss=0.963]


Validation set: Average loss = 0.8625, Accuracy = 0.6684


Epoch 4/5: 100%|██████████| 157/157 [01:02<00:00,  2.50it/s, loss=0.591]


Validation set: Average loss = 0.8857, Accuracy = 0.6839


Epoch 5/5: 100%|██████████| 157/157 [01:05<00:00,  2.38it/s, loss=0.254]


Validation set: Average loss = 0.7996, Accuracy = 0.7150


In [None]:
for i in test_sizes:
  resize = 227

  normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
  data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

  batch_size = 32
  num_workers = 2

  train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform, size = i)
  val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

  train_loader = torch.utils.data.DataLoader(
      train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

  val_loader = torch.utils.data.DataLoader(
      val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

  model = AlexNet()

  learning_rate = 0.005
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

  print('Training model on n = ', i, ' data points for AlexNet')
  train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs= 5)

Training model on n =  400  data points for AlexNet


Epoch 1/5: 100%|██████████| 13/13 [00:04<00:00,  2.82it/s, loss=1.07]


Validation set: Average loss = 1.3079, Accuracy = 0.6373


Epoch 2/5: 100%|██████████| 13/13 [00:05<00:00,  2.45it/s, loss=1.23]


Validation set: Average loss = 1.2412, Accuracy = 0.5907


Epoch 3/5: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=0.578]


Validation set: Average loss = 1.2246, Accuracy = 0.6010


Epoch 4/5: 100%|██████████| 13/13 [00:04<00:00,  2.96it/s, loss=1.15]


Validation set: Average loss = 1.0769, Accuracy = 0.6010


Epoch 5/5: 100%|██████████| 13/13 [00:05<00:00,  2.28it/s, loss=0.827]


Validation set: Average loss = 1.2048, Accuracy = 0.6580
Training model on n =  2000  data points for AlexNet


Epoch 1/5: 100%|██████████| 63/63 [00:23<00:00,  2.72it/s, loss=0.697]


Validation set: Average loss = 1.3248, Accuracy = 0.6166


Epoch 2/5: 100%|██████████| 63/63 [00:23<00:00,  2.66it/s, loss=0.947]


Validation set: Average loss = 1.1973, Accuracy = 0.6839


Epoch 3/5: 100%|██████████| 63/63 [00:26<00:00,  2.34it/s, loss=1.64]


Validation set: Average loss = 0.9258, Accuracy = 0.7047


Epoch 4/5: 100%|██████████| 63/63 [00:27<00:00,  2.26it/s, loss=0.602]


Validation set: Average loss = 1.1020, Accuracy = 0.6269


Epoch 5/5: 100%|██████████| 63/63 [00:23<00:00,  2.65it/s, loss=1.36]


Validation set: Average loss = 0.9364, Accuracy = 0.6891
Training model on n =  5000  data points for AlexNet


Epoch 1/5: 100%|██████████| 157/157 [00:56<00:00,  2.76it/s, loss=0.756]


Validation set: Average loss = 0.9566, Accuracy = 0.6995


Epoch 2/5: 100%|██████████| 157/157 [00:58<00:00,  2.67it/s, loss=1.32]


Validation set: Average loss = 0.8823, Accuracy = 0.7150


Epoch 3/5: 100%|██████████| 157/157 [00:57<00:00,  2.72it/s, loss=0.84]


Validation set: Average loss = 0.8177, Accuracy = 0.7098


Epoch 4/5: 100%|██████████| 157/157 [00:57<00:00,  2.75it/s, loss=0.388]


Validation set: Average loss = 0.7628, Accuracy = 0.6891


Epoch 5/5: 100%|██████████| 157/157 [00:57<00:00,  2.71it/s, loss=0.541]


Validation set: Average loss = 0.8204, Accuracy = 0.6995


In [None]:
for i in test_sizes:
  resize = 227

  normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
  data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

  batch_size = 32
  num_workers = 2

  train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform, size = i)
  val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

  train_loader = torch.utils.data.DataLoader(
      train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

  val_loader = torch.utils.data.DataLoader(
      val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

  model = VGG16()

  learning_rate = 0.005
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

  print('Training model on n = ', i, ' data points for VGG16')
  train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs= 5)

Training model on n =  400  data points for VGG16


Epoch 1/5: 100%|██████████| 13/13 [00:08<00:00,  1.51it/s, loss=0.46]


Validation set: Average loss = 1.6486, Accuracy = 0.6373


Epoch 2/5: 100%|██████████| 13/13 [00:08<00:00,  1.61it/s, loss=1.03]


Validation set: Average loss = 1.1453, Accuracy = 0.6114


Epoch 3/5: 100%|██████████| 13/13 [00:08<00:00,  1.60it/s, loss=0.869]


Validation set: Average loss = 1.3805, Accuracy = 0.6010


Epoch 4/5: 100%|██████████| 13/13 [00:08<00:00,  1.58it/s, loss=0.756]


Validation set: Average loss = 1.3499, Accuracy = 0.6062


Epoch 5/5: 100%|██████████| 13/13 [00:08<00:00,  1.58it/s, loss=1.43]


Validation set: Average loss = 1.1751, Accuracy = 0.6632
Training model on n =  2000  data points for VGG16


Epoch 1/5: 100%|██████████| 63/63 [00:37<00:00,  1.68it/s, loss=1.12]


Validation set: Average loss = 1.3240, Accuracy = 0.4819


Epoch 2/5: 100%|██████████| 63/63 [00:37<00:00,  1.69it/s, loss=0.609]


Validation set: Average loss = 1.0839, Accuracy = 0.6528


Epoch 3/5: 100%|██████████| 63/63 [00:37<00:00,  1.69it/s, loss=1.02]


Validation set: Average loss = 1.2013, Accuracy = 0.6995


Epoch 4/5: 100%|██████████| 63/63 [00:37<00:00,  1.70it/s, loss=0.774]


Validation set: Average loss = 1.1288, Accuracy = 0.6788


Epoch 5/5: 100%|██████████| 63/63 [00:37<00:00,  1.69it/s, loss=0.401]


Validation set: Average loss = 0.9779, Accuracy = 0.6995
Training model on n =  5000  data points for VGG16


Epoch 1/5: 100%|██████████| 157/157 [01:33<00:00,  1.68it/s, loss=1.09]


Validation set: Average loss = 1.0422, Accuracy = 0.7047


Epoch 2/5: 100%|██████████| 157/157 [01:34<00:00,  1.67it/s, loss=0.778]


Validation set: Average loss = 0.9636, Accuracy = 0.6632


Epoch 3/5: 100%|██████████| 157/157 [01:33<00:00,  1.68it/s, loss=2.23]


Validation set: Average loss = 0.9715, Accuracy = 0.6995


Epoch 4/5: 100%|██████████| 157/157 [01:33<00:00,  1.68it/s, loss=0.536]


Validation set: Average loss = 0.8610, Accuracy = 0.7254


Epoch 5/5: 100%|██████████| 157/157 [01:36<00:00,  1.64it/s, loss=0.436]


Validation set: Average loss = 0.8687, Accuracy = 0.7461


#Experiment 2: Feature extraction and layer-specific tuning using pre-loaded models

In [None]:
resize = 227

normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

batch_size = 32
num_workers = 2

train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform)
val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

val_loader = torch.utils.data.DataLoader(
    val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

In [None]:
resize = 224

normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

batch_size = 32
num_workers = 2

train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform)
val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

train_loader2 = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

val_loader2 = torch.utils.data.DataLoader(
    val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

## Using built in ResNet50

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.models as models
class Resnet(nn.Module):
    def __init__(self, mode='finetune', pretrained=True):
        super().__init__()
        self.resnet = models.resnet50(pretrained = pretrained)
        num_features = self.resnet.fc.in_features

        if mode == 'linear':
          for name, module in self.resnet.named_parameters():
            module.requires_grad = False
          self.resnet.fc = torch.nn.Linear(num_features, 7)

        if mode == 'finetune':
          self.resnet.fc = torch.nn.Linear(num_features, 7)

    def forward(self, x):
      x = self.resnet.forward(x)
      return x

    def to(self,device):
        return self.resnet.to(device=device)

In [None]:
model = Resnet(mode='linear',pretrained=True)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

criterion = nn.CrossEntropyLoss()

train(model, train_loader2, val_loader2, optimizer, criterion, device, num_epochs = 5)

In [None]:
classifier_layer = []
layers_1_2 = []
other_layers  = []

model = Resnet(mode='finetune',pretrained=True)

for name, module in model.named_parameters():
  if 'fc' in name:
    classifier_layer.append(module)
  elif 'layer1' in name or 'layer2' in name:
    layers_1_2.append(module)
  else:
    other_layers.append(module)


optimizer = torch.optim.SGD([
                {'params': other_layers},
                {'params': classifier_layer, 'lr': .01},
                {'params': layers_1_2, 'lr': 0}
            ], lr=0.001, momentum=0.9)

train(model, train_loader2, val_loader2, optimizer, criterion, device, num_epochs = 5)

Epoch 1/5: 100%|██████████| 313/313 [02:36<00:00,  2.00it/s, loss=0.559]


Validation set: Average loss = 0.4656, Accuracy = 0.8342


Epoch 2/5: 100%|██████████| 313/313 [02:26<00:00,  2.13it/s, loss=0.355]


Validation set: Average loss = 1.0347, Accuracy = 0.8446


Epoch 3/5: 100%|██████████| 313/313 [02:26<00:00,  2.14it/s, loss=0.416]


Validation set: Average loss = 0.4418, Accuracy = 0.8756


Epoch 4/5: 100%|██████████| 313/313 [02:27<00:00,  2.13it/s, loss=0.433]


Validation set: Average loss = 0.5869, Accuracy = 0.8394


Epoch 5/5: 100%|██████████| 313/313 [02:27<00:00,  2.12it/s, loss=0.0724]


Validation set: Average loss = 1.5301, Accuracy = 0.8497


##Using built in Alexnet

In [None]:
class AlexNet(nn.Module):
    def __init__(self, mode='finetune', pretrained=True):
        super().__init__()
        self.alexnet = models.alexnet(pretrained = pretrained)
        num_features = self.alexnet.classifier[6].in_features

        if mode == 'linear':
          for name, module in self.alexnet.named_parameters():
            module.requires_grad = False
          self.alexnet.classifier[6] = torch.nn.Linear(num_features, 7)

        if mode == 'finetune':
          self.alexnet.classifier[6] = torch.nn.Linear(num_features, 7)

    def forward(self, x):
      x = self.alexnet.forward(x)
      return x

    def to(self,device):
        return self.alexnet.to(device=device)

In [None]:
model = AlexNet(mode='linear',pretrained=True)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

criterion = nn.CrossEntropyLoss()

train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs = 5)

Epoch 1/5: 100%|██████████| 313/313 [01:53<00:00,  2.75it/s, loss=1.28]


Validation set: Average loss = 1.4442, Accuracy = 0.6632


Epoch 2/5: 100%|██████████| 313/313 [01:52<00:00,  2.77it/s, loss=2.43]


Validation set: Average loss = 1.6479, Accuracy = 0.6736


Epoch 3/5: 100%|██████████| 313/313 [01:53<00:00,  2.77it/s, loss=1.79]


Validation set: Average loss = 2.2089, Accuracy = 0.6839


Epoch 4/5: 100%|██████████| 313/313 [01:52<00:00,  2.78it/s, loss=2.56]


Validation set: Average loss = 1.1424, Accuracy = 0.6995


Epoch 5/5: 100%|██████████| 313/313 [01:52<00:00,  2.79it/s, loss=2.31]


Validation set: Average loss = 0.9629, Accuracy = 0.8031


In [None]:
classifier_layer = []
layers_1_2 = []
other_layers  = []

model = AlexNet(mode='finetune',pretrained=True)

for name, module in model.named_parameters():
  if 'Linear' in name:
    classifier_layer.append(module)
  elif 'layer1' in name or 'layer2' in name:
    layers_1_2.append(module)
  else:
    other_layers.append(module)


optimizer = torch.optim.SGD([
                {'params': other_layers},
                {'params': classifier_layer, 'lr': .01},
                {'params': layers_1_2, 'lr': 0}
            ], lr=0.001, momentum=0.9)

train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs = 5)

Epoch 1/5: 100%|██████████| 313/313 [01:55<00:00,  2.71it/s, loss=0.707]


Validation set: Average loss = 0.6725, Accuracy = 0.7772


Epoch 2/5: 100%|██████████| 313/313 [01:56<00:00,  2.68it/s, loss=0.474]


Validation set: Average loss = 0.4936, Accuracy = 0.8135


Epoch 3/5: 100%|██████████| 313/313 [01:56<00:00,  2.69it/s, loss=0.649]


Validation set: Average loss = 0.4992, Accuracy = 0.8238


Epoch 4/5: 100%|██████████| 313/313 [01:56<00:00,  2.68it/s, loss=0.267]


Validation set: Average loss = 0.4658, Accuracy = 0.8290


Epoch 5/5: 100%|██████████| 313/313 [01:55<00:00,  2.70it/s, loss=0.353]


Validation set: Average loss = 0.4570, Accuracy = 0.8653


##Using built in VGG16

In [None]:
class Vgg16(nn.Module):
    def __init__(self, mode='finetune', pretrained=True):
        super().__init__()
        self.vgg16 = models.vgg16(pretrained = pretrained)
        num_features = self.vgg16.classifier[6].in_features

        if mode == 'linear':
          for name, module in self.vgg16.named_parameters():
            module.requires_grad = False
          self.vgg16.classifier[6] = torch.nn.Linear(num_features, 7)

        if mode == 'finetune':
          self.vgg16.classifier[6] = torch.nn.Linear(num_features, 7)

    def forward(self, x):
      x = self.vgg16.forward(x)
      return x

    def to(self,device):
        return self.vgg16.to(device=device)

In [None]:
model = Vgg16(mode='linear', pretrained = True)

optimizer = torch.optim.SGD(model.parameters(), lr = 0.005, momentum = 0.9)

criterion = nn.CrossEntropyLoss()

train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs = 5)

Epoch 1/5: 100%|██████████| 313/313 [02:17<00:00,  2.28it/s, loss=1.34]


Validation set: Average loss = 1.1109, Accuracy = 0.6891


Epoch 2/5: 100%|██████████| 313/313 [02:09<00:00,  2.42it/s, loss=1.83]


Validation set: Average loss = 1.1856, Accuracy = 0.6580


Epoch 3/5: 100%|██████████| 313/313 [02:06<00:00,  2.47it/s, loss=2.13]


Validation set: Average loss = 0.9437, Accuracy = 0.6632


Epoch 4/5: 100%|██████████| 313/313 [02:06<00:00,  2.47it/s, loss=2.05]


Validation set: Average loss = 1.0940, Accuracy = 0.7098


Epoch 5/5: 100%|██████████| 313/313 [02:05<00:00,  2.49it/s, loss=1.15]


Validation set: Average loss = 0.8711, Accuracy = 0.6943


In [None]:
classifier_layer = []
layers_1_2 = []
other_layers  = []

model = Vgg16(mode='finetune',pretrained=True)

for name, module in model.named_parameters():
  if 'Linear' in name:
    classifier_layer.append(module)
  elif 'layer1' in name or 'layer2' in name:
    layers_1_2.append(module)
  else:
    other_layers.append(module)


optimizer = torch.optim.SGD([
                {'params': other_layers},
                {'params': classifier_layer, 'lr': .01},
                {'params': layers_1_2, 'lr': 0}
            ], lr=0.001, momentum=0.9)

train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs = 5)

Epoch 1/5: 100%|██████████| 313/313 [02:46<00:00,  1.88it/s, loss=0.885]


Validation set: Average loss = 0.7101, Accuracy = 0.7772


Epoch 2/5: 100%|██████████| 313/313 [02:44<00:00,  1.90it/s, loss=0.721]


Validation set: Average loss = 0.6000, Accuracy = 0.7565


Epoch 3/5: 100%|██████████| 313/313 [02:43<00:00,  1.91it/s, loss=0.548]


Validation set: Average loss = 0.5593, Accuracy = 0.8135


Epoch 4/5: 100%|██████████| 313/313 [02:43<00:00,  1.91it/s, loss=0.506]


Validation set: Average loss = 0.6708, Accuracy = 0.7720


Epoch 5/5: 100%|██████████| 313/313 [02:43<00:00,  1.91it/s, loss=0.415]


Validation set: Average loss = 0.5383, Accuracy = 0.8238


# Experiment 3: Look at specific worse-performing features


In [None]:
def train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs):
    # Place model on device
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        # Use tqdm to display a progress bar during training
        with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{num_epochs}') as pbar:
            for inputs, labels in train_loader:
                # Move inputs and labels to device
                inputs = inputs.to(device)
                labels = labels.to(device)
                # Zero out gradients
                optimizer.zero_grad()
                # Compute the logits and loss
                logits = model(inputs)
                loss = criterion(logits, labels)
                # Backpropagate the loss
                loss.backward()
                # Update the weights
                optimizer.step()
                # Update the progress bar
                pbar.update(1)
                pbar.set_postfix(loss=loss.item())

        # Evaluate the model on the validation set
        avg_loss, accuracy, label_loss_counter = evaluate(model, val_loader, criterion, device)
        print(f'Validation set: Average loss = {avg_loss:.4f}, Accuracy = {accuracy:.4f}')
    return label_loss_counter

def evaluate(model, test_loader, criterion, device):
    model.eval()  # Set model to evaluation mode

    label_loss_counter = {}

    for i in range(8):
      label_loss_counter[i] = 0

    with torch.no_grad():
        total_loss = 0.0
        num_correct = 0
        num_samples = 0

        for inputs, labels in test_loader:
            # Move inputs and labels to device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Compute the logits and loss
            logits = model(inputs)
            loss = criterion(logits, labels)
            total_loss += loss.item()

            # Compute the accuracy
            _, predictions = torch.max(logits, dim=1)
            num_correct += (predictions == labels).sum().item()
            num_samples += len(inputs)

            #print(labels)
            for i in range(len(predictions)):
              if predictions[i] != labels[i]:
                prediction = int(predictions[i])
                label_loss_counter[prediction] += 1

    # Compute the average loss and accuracy
    avg_loss = total_loss / len(test_loader)
    accuracy = num_correct / num_samples

    return avg_loss, accuracy, label_loss_counter

In [None]:
resize = 224

normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010],)
data_transform = transforms.Compose([transforms.Resize((resize,resize)),transforms.ToTensor(),normalize,])

batch_size = 32
num_workers = 2

train_data = SkinLesions(root_dir, "ISIC2018_Task3_Training_GroundTruth.csv", data_transform, size = 10000)
val_data = SkinLesions(root_dir, "ISIC2018_Task3_Validation_GroundTruth.csv", data_transform)

train_loader2 = torch.utils.data.DataLoader(
    train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

val_loader2 = torch.utils.data.DataLoader(
    val_data, batch_size=batch_size, num_workers=num_workers, shuffle=False)

In [None]:
classifier_layer = []
layers_1_2 = []
other_layers  = []

model = Resnet(mode='finetune',pretrained=True)

for name, module in model.named_parameters():
  if 'fc' in name:
    classifier_layer.append(module)
  elif 'layer1' in name or 'layer2' in name:
    layers_1_2.append(module)
  else:
    other_layers.append(module)


optimizer = torch.optim.SGD([
                {'params': other_layers},
                {'params': classifier_layer, 'lr': .01},
                {'params': layers_1_2, 'lr': 0}
            ], lr=0.001, momentum=0.9)

label_loss_counter_resnet = train(model, train_loader2, val_loader2, optimizer, criterion, device, num_epochs = 2)

Epoch 1/2: 100%|██████████| 313/313 [02:34<00:00,  2.03it/s, loss=0.915]


Validation set: Average loss = 0.5410, Accuracy = 0.8083


Epoch 2/2: 100%|██████████| 313/313 [02:23<00:00,  2.18it/s, loss=0.323]


Validation set: Average loss = 0.4727, Accuracy = 0.8446


In [None]:
label_loss_counter_resnet

{0: 4, 1: 18, 2: 0, 3: 2, 4: 6, 5: 0, 6: 0, 7: 0}

In [None]:
classifier_layer = []
layers_1_2 = []
other_layers  = []

model = AlexNet(mode='finetune',pretrained=True)

for name, module in model.named_parameters():
  if 'Linear' in name:
    classifier_layer.append(module)
  elif 'layer1' in name or 'layer2' in name:
    layers_1_2.append(module)
  else:
    other_layers.append(module)


optimizer = torch.optim.SGD([
                {'params': other_layers},
                {'params': classifier_layer, 'lr': .01},
                {'params': layers_1_2, 'lr': 0}
            ], lr=0.001, momentum=0.9)

label_loss_counter_alex = train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs = 2)

Epoch 1/2: 100%|██████████| 313/313 [02:00<00:00,  2.61it/s, loss=0.44]


Validation set: Average loss = 0.5979, Accuracy = 0.8083


Epoch 2/2: 100%|██████████| 313/313 [01:53<00:00,  2.75it/s, loss=1.16]


Validation set: Average loss = 0.5016, Accuracy = 0.8394


In [None]:
label_loss_counter_alex

{0: 3, 1: 8, 2: 6, 3: 1, 4: 13, 5: 0, 6: 0, 7: 0}

In [None]:
classifier_layer = []
layers_1_2 = []
other_layers  = []

model = Vgg16(mode='finetune',pretrained=True)

for name, module in model.named_parameters():
  if 'Linear' in name:
    classifier_layer.append(module)
  elif 'layer1' in name or 'layer2' in name:
    layers_1_2.append(module)
  else:
    other_layers.append(module)


optimizer = torch.optim.SGD([
                {'params': other_layers},
                {'params': classifier_layer, 'lr': .01},
                {'params': layers_1_2, 'lr': 0}
            ], lr=0.001, momentum=0.9)

label_loss_counter_Vgg = train(model, train_loader, val_loader, optimizer, criterion, device, num_epochs = 2)

Epoch 1/2: 100%|██████████| 313/313 [02:37<00:00,  1.99it/s, loss=0.628]


Validation set: Average loss = 0.6127, Accuracy = 0.7772


Epoch 2/2: 100%|██████████| 313/313 [02:36<00:00,  2.00it/s, loss=0.84]


Validation set: Average loss = 0.6300, Accuracy = 0.7927


In [None]:
label_loss_counter_Vgg

{0: 16, 1: 8, 2: 5, 3: 2, 4: 8, 5: 0, 6: 1, 7: 0}

#Experiment 4: Model Mean and Performance Visualization

First create tensor with only image values.

In [None]:
train_img_only = torch.zeros(len(train_data), 3, 227, 227)
train_img_only.shape

for i in range(len(train_data)):
  train_img_only[i] = train_data[i][0]

Create Pred and Label Arrays after passing images through model to classify.

In [None]:
pred = []
count = 0
for img in train_img_only:
  out = model.forward(img.cuda().unsqueeze(0))
  out.cuda()
  _, prediction = torch.max(out, dim=1)
  prediction.cuda()
  prediction = prediction.cpu().numpy()[0]
  pred.append(prediction)
  count += 1


In [None]:
pred = np.array(pred)
labels = []
for i in range(len(train_data)):
  label = train_data[i][1]
  labels.append(label)

Use Counter to get an idea of how classifier predicted outputs

In [None]:
from collections import Counter
print(Counter(pred))
Counter(labels)

Create Index Arrays

In [None]:
label_directory = {0: 'MEL', 1: 'NV', 2: 'BCC', 3: 'AKIEC', 4: 'BKL', 5: 'DF', 6: 'VASC'}
melanoma_label = [1 if i == 6 else 0 for i in labels]
melanoma_label_indices = []
for i, a in enumerate(melanoma_label):
  if a == 1:
    melanoma_label_indices.append(i)

label_directory = {0: 'MEL', 1: 'NV', 2: 'BCC', 3: 'AKIEC', 4: 'BKL', 5: 'DF', 6: 'VASC'}
melanoma_pred = [1 if i == 6 else 0 for i in pred]
melanoma_pred_indices = []
for i, a in enumerate(melanoma_pred):
  if a == 1:
    melanoma_pred_indices.append(i)

Use index arrays to calculate ground truth and predicted image arrays, and use those to compute mean image

In [None]:
mel_truth_img_only = train_img_only[melanoma_label_indices]
mel_pred_img_only = train_img_only[melanoma_pred_indices]

sum = torch.zeros(3, 227, 227)
count = 0
for img in train_img_only:
  sum += img
total_average = sum / len(train_img_only)
sum = 0
for img in mel_truth_img_only:
  sum += img
mel_truth_average = sum / len(mel_truth_img_only)
sum = 0
for img in mel_pred_img_only:
  sum += img
mel_pred_average = sum / len(mel_pred_img_only)


Show images

In [None]:
from google.colab.patches import cv2_imshow
import cv2
plt.imshow(  mel_truth_average.permute(1, 2, 0)  )
plt.imshow(  mel_pred_average.permute(1, 2, 0)  )