In [1]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [3]:
import os 
import zipfile 

import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Input, Dropout,Flatten, Conv2D, MaxPool2D, Lambda
from tensorflow.keras.layers import BatchNormalization, Activation, MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torchvision
import torchvision.transforms as transforms

import glob
from PIL import Image

from sklearn.model_selection import train_test_split

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding

from IPython.display import clear_output
from tqdm import tqdm

from sklearn.metrics import confusion_matrix
import seaborn as sns

from keras import backend as K

from sklearn.utils import resample

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19

from imblearn.over_sampling import SMOTE 
from collections import Counter

In [4]:
print(tf.__version__)

2.5.0-rc1


In [4]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:
os.chdir('./drive/MyDrive/HAM10000/')
!pwd

/content/drive/MyDrive/HAM10000


In [5]:
data_dir = "images/"
metadata_path = "HAM10000_metadata.csv"

In [6]:
#Lesion Dictionary
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

# Dictionary for Image Names
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in glob.glob(os.path.join(data_dir, '*.jpg'))}

In [7]:
# Read metadata into dataframe
metadata = pd.read_csv(metadata_path)
skin_df = metadata

#Create useful Columns - Images Path, Lesion Type and Lesion Categorical Code
skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get) 
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

In [8]:
skin_df

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,path,cell_type,cell_type_idx
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,images\ISIC_0027419.jpg,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,images\ISIC_0025030.jpg,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,images\ISIC_0026769.jpg,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,images\ISIC_0025661.jpg,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,images\ISIC_0031633.jpg,Benign keratosis-like lesions,2
...,...,...,...,...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,vidir_modern,images\ISIC_0033084.jpg,Actinic keratoses,0
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,vidir_modern,images\ISIC_0033550.jpg,Actinic keratoses,0
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,vidir_modern,images\ISIC_0033536.jpg,Actinic keratoses,0
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,images\ISIC_0032854.jpg,Actinic keratoses,0


In [9]:
# Attach images to the skin_df Dataframe
temp_arr, count = [], 0
for path in tqdm(skin_df['path']):
  temp_arr.append(np.asarray( Image.open(path).resize((100,75)) ))
  count += 1
  # print(f"Processed images: {count}")
  # if count % 10 == 0: clear_output()

skin_df['image'] = temp_arr


# skin_df['image'] = skin_df['path'].map(lambda x: np.asarray(Image.open(x).resize((100,75))))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10015/10015 [04:00<00:00, 41.65it/s]


In [11]:
skin_df_nv = skin_df[skin_df['cell_type_idx'] == 4]
print(skin_df_nv.shape)
skin_df_nv = skin_df[0:2000]
print(skin_df_nv.shape)

skin_df = skin_df[skin_df['cell_type_idx'] != 4]
print(skin_df.shape)

skin_df = skin_df.append(skin_df_nv)

(6705, 12)
(2000, 12)
(3310, 12)


In [21]:
skin_df

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,path,cell_type,cell_type_idx,image
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,images/ISIC_0027419.jpg,Benign keratosis-like lesions,2,"[[[190, 153, 194], [192, 154, 196], [191, 153,..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,images/ISIC_0025030.jpg,Benign keratosis-like lesions,2,"[[[23, 13, 22], [24, 14, 24], [25, 14, 28], [3..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,images/ISIC_0026769.jpg,Benign keratosis-like lesions,2,"[[[185, 127, 137], [189, 133, 147], [194, 136,..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,images/ISIC_0025661.jpg,Benign keratosis-like lesions,2,"[[[24, 11, 17], [26, 13, 22], [38, 21, 32], [5..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,images/ISIC_0031633.jpg,Benign keratosis-like lesions,2,"[[[134, 90, 113], [147, 102, 125], [159, 115, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,vidir_modern,images/ISIC_0033084.jpg,Actinic keratoses,0,"[[[155, 137, 148], [154, 136, 147], [151, 134,..."
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,vidir_modern,images/ISIC_0033550.jpg,Actinic keratoses,0,"[[[5, 6, 4], [6, 7, 4], [7, 8, 6], [7, 7, 5], ..."
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,vidir_modern,images/ISIC_0033536.jpg,Actinic keratoses,0,"[[[112, 102, 105], [127, 116, 117], [141, 127,..."
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,images/ISIC_0032854.jpg,Actinic keratoses,0,"[[[159, 122, 146], [159, 123, 141], [154, 112,..."


In [22]:
# Separate the dataframe into Features and Targets data
features = skin_df.drop(columns=['cell_type_idx','dx'],axis=1)
target = skin_df['cell_type_idx']

In [23]:
# Create First Train and Test sets
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(features, target, test_size=0.20, random_state=1)

print(x_train_o.shape) # x_train_o is a pd.Dataframe
print(y_train_o.shape) # y_train_o is a pd.Series

(8012, 10)
(8012,)


In [24]:
# Create image arrays from the train and test sets
x_train = np.asarray(x_train_o['image'].tolist())
x_test = np.asarray(x_test_o['image'].tolist())

print(x_train.shape)
print(x_test.shape)

(8012, 75, 100, 3)
(2003, 75, 100, 3)


In [25]:
sm = SMOTE(random_state=23, 
           #k_neighbors=2
           )
print(x_train.shape)
print(y_train_o.shape)
x_train_sm, y_train_sm = sm.fit_resample(x_train.reshape(x_train.shape[0], -1), y_train_o)
print(len(x_train_sm), len(y_train_sm))

county_sm = Counter(y_train_sm)
print(county_sm)

(8012, 75, 100, 3)
(8012,)




37646 37646
Counter({2: 5378, 4: 5378, 5: 5378, 6: 5378, 1: 5378, 0: 5378, 3: 5378})


In [26]:
# Reshape images back for CNN use
print(x_train_sm.shape)
print(y_train_sm.shape)

x_train = x_train_sm.reshape(x_train_sm.shape[0], 75, 100, 3)
y_train_o = y_train_sm

print(x_train.shape)
print(y_train_o.shape)
print(Counter(y_train_o))

(37646, 22500)
(37646,)
(37646, 75, 100, 3)
(37646,)
Counter({2: 5378, 4: 5378, 5: 5378, 6: 5378, 1: 5378, 0: 5378, 3: 5378})


In [27]:
# The normalisation is done using the training set Mean and Std. Deviation as reference
x_train_mean = np.mean(x_train)
x_train_std = np.std(x_train)

print(f'x_train_mean: {x_train_mean}')
print(f'x_train_std: {x_train_std}')

x_train_mean: 151.58466119345718
x_train_std: 59.27919712306485


In [28]:
# MAY HANG
x_train = (x_train - x_train_mean) / x_train_std
x_test = (x_test - x_train_mean) / x_train_std

In [29]:
# Perform one-hot encoding on the labels
y_train = to_categorical(y_train_o, num_classes = 7)
y_test = to_categorical(y_test_o, num_classes = 7)

print(y_train.shape)
print(y_test.shape)

(37646, 7)
(2003, 7)


In [17]:
# Running because the next cell crashes Colab
x_train_splits = np.array_split(x_train, 10)
y_train_splits = np.array_split(y_train, 10)

print(len(x_train_splits))
print(len(y_train_splits))

10
10


In [None]:
# Splitting training into Train and Validatation sets

from time import sleep

x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state=1)
print(x_train.shape)
# for i in range(len(x_train_splits)):
#   t_x_train, t_x_validate, t_y_train, t_y_validate = train_test_split(x_train_splits[i], y_train_splits[i], test_size = 0.1, random_state=1)
#   print(t_x_train.shape)

#   x_train = np.concatenate((x_train, t_x_train))
#   x_validate = np.concatenate((x_validate, t_x_validate))
#   y_train = np.concatenate((y_train, t_y_train))
#   y_validate = np.concatenate((y_validate, t_y_validate))

#   if i % 4 == 0: sleep(6)


# Reshaping the Images into 3 channels (RGB)
# height = 75, width = 100, canal = 3
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))

print(x_train.shape)
print(x_test.shape)
print(x_validate.shape)