In [1]:
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import transforms

def load_train_data(train_path, train_batch_size, size, shuffle = True):
    transformers = transforms.Compose([
                    transforms.Resize((size, size)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    set_data = ImageFolderWithPaths(root = train_path, transform = transformers)
    return DataLoader(set_data, batch_size = train_batch_size, shuffle = shuffle)

def load_test_data(test_path, test_batch_size, size, shuffle = True):
    transformers = transforms.Compose([transforms.Resize((size, size)),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    set_data = ImageFolderWithPaths(root = test_path, transform = transformers)
    return DataLoader(set_data, batch_size = test_batch_size, shuffle = shuffle)

In [2]:
import torch
from torchvision import datasets

class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("data_mnist/HAM10000_metadata.csv")

In [5]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10015 entries, 0 to 10014
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   lesion_id     10015 non-null  object 
 1   image_id      10015 non-null  object 
 2   dx            10015 non-null  object 
 3   dx_type       10015 non-null  object 
 4   age           9958 non-null   float64
 5   sex           10015 non-null  object 
 6   localization  10015 non-null  object 
dtypes: float64(1), object(6)
memory usage: 547.8+ KB


In [7]:
df[df["age"].isna() == True].count()

lesion_id       57
image_id        57
dx              57
dx_type         57
age              0
sex             57
localization    57
dtype: int64

In [8]:
m = int(df['age'].mean())

In [9]:
df['age'].fillna(m, inplace=True)

In [10]:
df[df["age"].isna() == True].count()

lesion_id       0
image_id        0
dx              0
dx_type         0
age             0
sex             0
localization    0
dtype: int64

In [11]:
df["dx_type"].value_counts()

histo        5340
follow_up    3704
consensus     902
confocal       69
Name: dx_type, dtype: int64

In [12]:
df['dx_types'] = df['dx_type'].map({'histo': 0, 'follow_up': 1, 'consensus': 2, 'confocal': 3})

In [13]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dx_types
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,0
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,0
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,0
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,0
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,0


In [14]:
import torch.nn as nn


class ConvNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNet, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=5, out_channels=16,
                               kernel_size=3, padding=1)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32,
                               kernel_size=3, padding=1)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64,
                               kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64,
                               kernel_size=3, padding=1)

        self.maxpool = nn.MaxPool2d(kernel_size=2)

        self.dropout = nn.Dropout()

        self.fc1 = nn.Linear(in_features=1024, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=num_classes)

        self.relu = nn.ReLU()


    def forward(self, x):
        #64x64x5  
        x = self.relu(self.conv1(x))
        #64x64x16  
        x = self.maxpool(x)
        #32x32x16  
        x = self.relu(self.conv2(x))
        #32x32x32   
        x = self.maxpool(x)
        #16x16x32  
        x = self.relu(self.conv3(x))
        #16x16x64   
        x = self.maxpool(x)
        #8x8x64     
        x = self.relu(self.conv4(x))
        #8x8x64    
        x = self.maxpool(x)
        #4x4x64    
        #Flatening
        x = x.view(-1, 1024) 

        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [17]:
from tqdm import tqdm


EPOCHS = 25
Learning_rate = 0.001
Batch_size = 64
L2_rate = 0

image_size = 64
data_size = 9017
num_batches = data_size//Batch_size
num_classes = 7

Train_path = "C:/Users/User/Desktop/Astghik/Skin_cancer/data/train/"


model = ConvNet(num_classes)


data_loader = load_train_data(Train_path, Batch_size, image_size)


def train():
    model.train()

    crossentropy = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                lr=Learning_rate)

    for epoch in range(EPOCHS):
        epoch_loss = 0
        epoch_acc = 0
        
        
        for X, y, path in tqdm(data_loader):
            
            X_train_list = []
              
            for i in range(len(X)):
                
                X_train_list.append(torch.from_numpy(np.append(np.array(X[i]), (np.ones((image_size,image_size))*float(df[df["image_id"] 
                                         == path[i].split("\\")[1].split(".")[0]]['age']),
                                         np.ones((image_size,image_size))*float(df[df["image_id"] 
                                         == path[i].split("\\")[1].split(".")[0]]['dx_types']))).reshape(5,image_size,image_size)))
            
            
            X_train = torch.cat(X_train_list).reshape(-1,5,image_size,image_size)
            X_train = X_train.float()
            
            optimizer.zero_grad()
            
            out = model(X_train)
                

            loss = crossentropy(out, y)
            loss.backward()

            optimizer.step()

            epoch_loss += loss.item() 

            predictions = torch.argmax(out, 1)
            epoch_acc += torch.sum(predictions==y).item()

        epoch_loss = epoch_loss/num_batches
        epoch_acc = epoch_acc/data_size
        print(f"Epoch {epoch}:")
        print("ACC:", epoch_acc, "LOSS:", epoch_loss)

        torch.save(model.state_dict(),
                   f"C:/Users/User/Desktop/Astghik/Skin_cancer/saved_models/cnn_5d_64/CNN_5d_{epoch}.model")


In [18]:
data_loader.dataset.class_to_idx

{'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}

In [19]:
train()

100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [03:58<00:00,  1.69s/it]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 0:
ACC: 0.6593101918598203 LOSS: 1.1651601761579513


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:16<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 1:
ACC: 0.6850393700787402 LOSS: 0.8900944839630808


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:24<00:00,  1.02s/it]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 2:
ACC: 0.6992347787512476 LOSS: 0.8016497752496174


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:33<00:00,  1.09s/it]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 3:
ACC: 0.7086614173228346 LOSS: 0.7711303019097873


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:20<00:00,  1.00it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 4:
ACC: 0.717089941222136 LOSS: 0.7402985600488526


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.02it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 5:
ACC: 0.7241876455583897 LOSS: 0.7278132125735283


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 6:
ACC: 0.7287346123988022 LOSS: 0.7257757853184428


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 7:
ACC: 0.7282910058777864 LOSS: 0.7149084489260401


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.02it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 8:
ACC: 0.7361650216258179 LOSS: 0.6881237036415508


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:18<00:00,  1.02it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 9:
ACC: 0.7345014971720084 LOSS: 0.6887998900243214


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:16<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 10:
ACC: 0.7398247754241988 LOSS: 0.6835611613733428


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 11:
ACC: 0.7424864145502938 LOSS: 0.668824339551585


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 12:
ACC: 0.7414882998780082 LOSS: 0.6691357310329165


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:16<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 13:
ACC: 0.7472551846512143 LOSS: 0.6616318276950292


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 14:
ACC: 0.7522457580126428 LOSS: 0.6471563300916127


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:16<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 15:
ACC: 0.7472551846512143 LOSS: 0.6602755418845585


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:16<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 16:
ACC: 0.7470333813907064 LOSS: 0.6496755972504615


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:16<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 17:
ACC: 0.7502495286680714 LOSS: 0.6484542510339192


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 18:
ACC: 0.7593434623488965 LOSS: 0.6255710058978625


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:20<00:00,  1.00it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 19:
ACC: 0.7569036264833093 LOSS: 0.6379240659730775


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:18<00:00,  1.02it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 20:
ACC: 0.7623378063657535 LOSS: 0.6161082710538591


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 21:
ACC: 0.767106576466674 LOSS: 0.6135552499975477


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 22:
ACC: 0.7698791172230232 LOSS: 0.6037537966455732


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]
  0%|                                                                                          | 0/141 [00:00<?, ?it/s]

Epoch 23:
ACC: 0.7663302650548963 LOSS: 0.5993664541414806


100%|████████████████████████████████████████████████████████████████████████████████| 141/141 [02:17<00:00,  1.03it/s]

Epoch 24:
ACC: 0.7688810025507375 LOSS: 0.5961216494441033





In [28]:
import torch
from sklearn.metrics import classification_report
from tqdm import tqdm

model = ConvNet(7)

state_dict = torch.load("C:/Users/User/Desktop/Astghik/Skin_cancer/saved_models/cnn_5d_64/CNN_5d_20.model")

model.load_state_dict(state_dict)

data_size = 998
test_batch_size = 64
size = 64

test_path = "C:/Users/User/Desktop/Astghik/Skin_cancer/data/test/"

data_loader = load_test_data(test_path, test_batch_size, size, shuffle=False)

def test():
    model.eval()
    acc = 0
    y_hat = []
    y_true = []
    for X, y, path in tqdm(data_loader):
        X_test_list = []
        
        for i in range(len(X)):
            X_test_list.append(torch.from_numpy(np.append(np.array(X[i]), (np.ones((size,size))*float(df[df["image_id"] 
                                         == path[i].split("\\")[1].split(".")[0]]['age']),
                                         np.ones((size,size))*float(df[df["image_id"] 
                                         == path[i].split("\\")[1].split(".")[0]]['dx_types']))).reshape(5,size,size)))
            
        X_test = torch.cat(X_test_list).reshape(-1,5,size,size)
        X_test = X_test.float()
        
        out = model(X_test)
        
        predictions = torch.argmax(out, 1)
        acc += torch.sum(predictions == y).item()
        y_hat.append(predictions)
        y_true.append(y)
        
    y_hat = torch.cat(y_hat)
    y_true = torch.cat(y_true)
    acc = acc/data_size
    print(acc)
    print(classification_report(y_hat, y_true))

In [21]:
test() #24

100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:14<00:00,  1.13it/s]

0.7474949899799599
              precision    recall  f1-score   support

           0       0.34      0.48      0.40        23
           1       0.47      0.43      0.45        56
           2       0.14      0.83      0.24        18
           3       0.00      0.00      0.00         2
           4       0.46      0.50      0.48       102
           5       0.96      0.81      0.88       794
           6       0.07      0.33      0.12         3

    accuracy                           0.75       998
   macro avg       0.35      0.48      0.37       998
weighted avg       0.85      0.75      0.79       998




