In [17]:
import numpy as np
import pandas as pd

In [18]:
df = pd.read_csv('HAM10000_metadata.csv')

In [19]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [20]:
df['dx'].value_counts()

nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: dx, dtype: int64

In [43]:
df['localization'].value_counts()

back               2192
lower extremity    2077
trunk              1404
upper extremity    1118
abdomen            1022
face                745
chest               407
foot                319
unknown             234
neck                168
scalp               128
hand                 90
ear                  56
genital              48
acral                 7
Name: localization, dtype: int64

In [21]:
d = {'nv':[i for i in df[df['dx'] == 'nv']['image_id']],
 'mel':[i for i in df[df['dx'] == 'mel']['image_id']],
'bkl':[i for i in df[df['dx'] == 'bkl']['image_id']],
'bcc':[i for i in df[df['dx'] == 'bcc']['image_id']],
'akiec':[i for i in df[df['dx'] == 'akiec']['image_id']],
'vasc':[i for i in df[df['dx'] == 'vasc']['image_id']],
'df':[i for i in df[df['dx'] == 'df']['image_id']]}

In [27]:
import os
from shutil import copyfile

In [39]:
for i in d.keys():
    if not os.path.exists('data/train/{}'.format(i)):
        os.makedirs('data/train/{}'.format(i))
    

In [40]:
for i in d.keys():
    if not os.path.exists('data/test/{}'.format(i)):
        os.makedirs('data/test/{}'.format(i))

In [41]:
for i in os.listdir("HAM10000_images_part_1"):
    for j in d.keys():
        if i.split('.')[0] in d[j]:
            copyfile('HAM10000_images_part_1/{}'.format(i), 'data/train/{}/{}'.format(j,i))
            

In [42]:
for i in os.listdir("HAM10000_images_part_2"):
    for j in d.keys():
        if i.split('.')[0] in d[j]:
            copyfile('HAM10000_images_part_2/{}'.format(i), 'data/train/{}/{}'.format(j,i))

In [57]:
import random
import shutil

In [65]:
for k in d.keys():
    for i in random.sample(os.listdir('data/train/{}'.format(k)), int(len(d[k])*0.1)):
        shutil.move('data/train/{}/{}'.format(k,i),'data/test/{}'.format(k))
        
        

In [78]:
print("Train")
print("----------------")
for i in d.keys():
    print(i, len(os.listdir('data/train/{}'.format(i))))

Train
----------------
nv 6035
mel 1002
bkl 990
bcc 463
akiec 295
vasc 128
df 104


In [79]:
print("Test")
print("----------------")
for i in d.keys():
    print(i, len(os.listdir('data/test/{}'.format(i))))

Test
----------------
nv 670
mel 111
bkl 109
bcc 51
akiec 32
vasc 14
df 11


In [71]:
df['dx'].value_counts()

nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: dx, dtype: int64

In [72]:
x = 0
for i in d.keys():
    x+=len(os.listdir('data/train/{}'.format(i)))

In [73]:
x

9017

In [74]:
y = 0
for i in d.keys():
    y+=len(os.listdir('data/test/{}'.format(i)))

In [75]:
y

998

In [76]:
x+y

10015

In [80]:
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import transforms

def load_train_data(train_path, train_batch_size, size, shuffle = True):
    transformers = transforms.Compose([
                    transforms.Resize((size, size)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    set_data = datasets.ImageFolder(root = train_path, transform = transformers)
    return DataLoader(set_data, batch_size = train_batch_size, shuffle = shuffle, num_workers = 1)

def load_test_data(test_path, test_batch_size, size, shuffle = True):
    transformers = transforms.Compose([transforms.Resize((size, size)),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    set_data = datasets.ImageFolder(root = test_path, transform = transformers)
    return DataLoader(set_data, batch_size = test_batch_size, shuffle = shuffle, num_workers = 1)

In [82]:
import torch.nn as nn

#Best 0.81
#lr = 0.001
class ConvNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNet, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16,
                               kernel_size=3, padding=1)

        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32,
                               kernel_size=3, padding=1)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64,
                               kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64,
                               kernel_size=3, padding=1)

        self.maxpool = nn.MaxPool2d(kernel_size=2)

        self.dropout = nn.Dropout()

        self.fc1 = nn.Linear(in_features=1024, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=num_classes)

        self.relu = nn.ReLU()


    def forward(self, x):
        #64x64x3
        x = self.relu(self.conv1(x))
        #64x64x16
        x = self.maxpool(x)
        #32x32x16
        x = self.relu(self.conv2(x))
        #32x32x32
        x = self.maxpool(x)
        #16x16x32
        x = self.relu(self.conv3(x))
        #16x16x64
        x = self.maxpool(x)
        #8x8x64
        x = self.relu(self.conv4(x))
        #8x8x64
        x = self.maxpool(x)
        #4x4x64
        #Flatening
        x = x.view(-1, 1024)

        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x 

In [87]:
import torch
from sklearn.metrics import classification_report
from tqdm import tqdm

model = ConvNet(7)

state_dict = torch.load("C:/Users/Home/Desktop/Mnist_/skin-cancer-mnist-ham10000/saved_models/ConvNet_20.model")
model.load_state_dict(state_dict)

data_size = 998
test_batch_size = 64
size = 64

test_path = "C:/Users/Home/Desktop/Mnist_/skin-cancer-mnist-ham10000/data/test/"

data_loader = load_test_data(test_path, test_batch_size, size, shuffle=False)

def test():
    model.eval()
    acc = 0
    y_hat = []
    y_true = []
    for X, y in tqdm(data_loader):
#         X = X.view(-1, size*size*3)
        out = model(X)
        
        predictions = torch.argmax(out, 1)
        acc += torch.sum(predictions == y).item()
        y_hat.append(predictions)
        y_true.append(y)
        
    y_hat = torch.cat(y_hat)
    y_true = torch.cat(y_true)
    acc = acc/data_size
    print(acc)
    print(classification_report(y_hat, y_true))

In [88]:
test()

100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:21<00:00,  1.04s/it]


0.7555110220440882


  'recall', 'true', average, warn_for)


             precision    recall  f1-score   support

          0       0.22      0.41      0.29        17
          1       0.55      0.54      0.54        52
          2       0.45      0.50      0.47        98
          3       0.00      0.00      0.00         0
          4       0.36      0.56      0.44        71
          5       0.92      0.83      0.87       743
          6       0.86      0.71      0.77        17

avg / total       0.80      0.76      0.78       998

