Install new packages: Image from PIL

In [1]:
import pandas as pd
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import imageio
from PIL import Image

### Data retrieval and exploration

In [2]:
path = '~/code/cecyprice/skin_lesion_detection/dataset/'
metadata = pd.read_csv(path + 'HAM10000_metadata.csv')
dim1_L = pd.read_csv(path + 'hmnist_8_8_L.csv')
dim1_RGB = pd.read_csv(path + 'hmnist_8_8_RGB.csv')
dim2_L = pd.read_csv(path + 'hmnist_28_28_L.csv')
dim2_RGB = pd.read_csv(path + 'hmnist_28_28_RGB.csv')

In [3]:
metadata.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [4]:
metadata.shape

(10015, 7)

In [5]:
dim1_L.shape

(10015, 65)

In [6]:
dim1_RGB.shape

(10015, 193)

In [7]:
dim2_L.shape

(10015, 785)

In [8]:
dim2_RGB.shape

(10015, 2353)

In [9]:
dim2_RGB.head()

Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel2343,pixel2344,pixel2345,pixel2346,pixel2347,pixel2348,pixel2349,pixel2350,pixel2351,label
0,192,153,193,195,155,192,197,154,185,202,...,173,124,138,183,147,166,185,154,177,2
1,25,14,30,68,48,75,123,93,126,158,...,60,39,55,25,14,28,25,14,27,2
2,192,138,153,200,145,163,201,142,160,206,...,167,129,143,159,124,142,136,104,117,2
3,38,19,30,95,59,72,143,103,119,171,...,44,26,36,25,12,17,25,12,15,2
4,158,113,139,194,144,174,215,162,191,225,...,209,166,185,172,135,149,109,78,92,2


In [10]:
dim1_L.label.unique()

array([2, 4, 3, 6, 5, 1, 0], dtype=int64)

### Create dictionary of images and labels

In [11]:
base_skin_dir = os.path.join('..','dataset')
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

In [12]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

### Merge all datasets

In [13]:
skin_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))

In [14]:
skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get) 
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

In [15]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0027419...,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025030...,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0026769...,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025661...,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,..\dataset\HAM10000_images_part_2\ISIC_0031633...,Benign keratosis-like lesions,2


In [16]:
skin_df['hmnist_8_8_L'] = dim1_L.apply(lambda r: tuple(r), axis=1).apply(np.array)
skin_df['hmnist_8_8_RGB'] = dim1_RGB.apply(lambda r: tuple(r), axis=1).apply(np.array)
skin_df['hmnist_28_28_L'] = dim2_L.apply(lambda r: tuple(r), axis=1).apply(np.array)
skin_df['hmnist_28_28_RGB'] = dim2_RGB.apply(lambda r: tuple(r), axis=1).apply(np.array)

In [17]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,hmnist_8_8_L,hmnist_8_8_RGB,hmnist_28_28_L,hmnist_28_28_RGB
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0027419...,Benign keratosis-like lesions,2,"[172, 182, 191, 183, 180, 181, 165, 164, 173, ...","[199, 156, 188, 210, 165, 198, 216, 176, 203, ...","[169, 171, 170, 177, 181, 182, 181, 185, 194, ...","[192, 153, 193, 195, 155, 192, 197, 154, 185, ..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025030...,Benign keratosis-like lesions,2,"[98, 149, 170, 193, 183, 162, 164, 100, 137, 1...","[115, 87, 115, 180, 133, 158, 200, 153, 187, 2...","[19, 57, 105, 140, 149, 148, 144, 155, 170, 17...","[25, 14, 30, 68, 48, 75, 123, 93, 126, 158, 12..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0026769...,Benign keratosis-like lesions,2,"[165, 164, 179, 172, 152, 163, 169, 151, 168, ...","[203, 146, 164, 209, 144, 153, 217, 160, 181, ...","[155, 163, 161, 167, 167, 172, 155, 152, 165, ...","[192, 138, 153, 200, 145, 163, 201, 142, 160, ..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025661...,Benign keratosis-like lesions,2,"[109, 159, 167, 166, 163, 159, 155, 96, 141, 1...","[135, 96, 110, 198, 141, 156, 208, 148, 158, 2...","[25, 71, 116, 139, 136, 153, 148, 161, 172, 16...","[38, 19, 30, 95, 59, 72, 143, 103, 119, 171, 1..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,..\dataset\HAM10000_images_part_2\ISIC_0031633...,Benign keratosis-like lesions,2,"[173, 202, 210, 194, 208, 248, 243, 205, 180, ...","[204, 156, 187, 232, 185, 216, 236, 196, 220, ...","[129, 162, 181, 196, 205, 208, 205, 213, 225, ...","[158, 113, 139, 194, 144, 174, 215, 162, 191, ..."


In [18]:
images=[]
for index,image in enumerate(skin_df.path):
    images.append(imageio.imread(skin_df.path[index]))

In [21]:
skin_df['image_450_600']=pd.Series(images)

In [22]:
skin_df['image_100_75'] = skin_df['path'].map(lambda x: np.asarray(Image.open(x).resize((100,75))))

In [23]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,hmnist_8_8_L,hmnist_8_8_RGB,hmnist_28_28_L,hmnist_28_28_RGB,image_450_600,image_100_75
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0027419...,Benign keratosis-like lesions,2,"[172, 182, 191, 183, 180, 181, 165, 164, 173, ...","[199, 156, 188, 210, 165, 198, 216, 176, 203, ...","[169, 171, 170, 177, 181, 182, 181, 185, 194, ...","[192, 153, 193, 195, 155, 192, 197, 154, 185, ...","[[[188, 147, 191], [186, 148, 189], [187, 150,...","[[[190, 153, 194], [192, 154, 196], [191, 153,..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025030...,Benign keratosis-like lesions,2,"[98, 149, 170, 193, 183, 162, 164, 100, 137, 1...","[115, 87, 115, 180, 133, 158, 200, 153, 187, 2...","[19, 57, 105, 140, 149, 148, 144, 155, 170, 17...","[25, 14, 30, 68, 48, 75, 123, 93, 126, 158, 12...","[[[25, 15, 23], [25, 14, 22], [25, 14, 22], [2...","[[[23, 13, 22], [24, 14, 24], [25, 14, 28], [3..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0026769...,Benign keratosis-like lesions,2,"[165, 164, 179, 172, 152, 163, 169, 151, 168, ...","[203, 146, 164, 209, 144, 153, 217, 160, 181, ...","[155, 163, 161, 167, 167, 172, 155, 152, 165, ...","[192, 138, 153, 200, 145, 163, 201, 142, 160, ...","[[[186, 128, 140], [188, 128, 136], [183, 126,...","[[[185, 127, 137], [189, 133, 147], [194, 136,..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025661...,Benign keratosis-like lesions,2,"[109, 159, 167, 166, 163, 159, 155, 96, 141, 1...","[135, 96, 110, 198, 141, 156, 208, 148, 158, 2...","[25, 71, 116, 139, 136, 153, 148, 161, 172, 16...","[38, 19, 30, 95, 59, 72, 143, 103, 119, 171, 1...","[[[24, 9, 16], [22, 11, 15], [23, 11, 15], [26...","[[[24, 11, 17], [26, 13, 22], [38, 21, 32], [5..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,..\dataset\HAM10000_images_part_2\ISIC_0031633...,Benign keratosis-like lesions,2,"[173, 202, 210, 194, 208, 248, 243, 205, 180, ...","[204, 156, 187, 232, 185, 216, 236, 196, 220, ...","[129, 162, 181, 196, 205, 208, 205, 213, 225, ...","[158, 113, 139, 194, 144, 174, 215, 162, 191, ...","[[[122, 80, 102], [124, 82, 104], [127, 83, 10...","[[[134, 90, 113], [147, 102, 125], [159, 115, ..."


In [24]:
skin_df['image_100_75_reshaped'] = skin_df['image_100_75'].apply(lambda x : x.reshape(22500))

In [25]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,hmnist_8_8_L,hmnist_8_8_RGB,hmnist_28_28_L,hmnist_28_28_RGB,image_450_600,image_100_75,image_100_75_reshaped
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0027419...,Benign keratosis-like lesions,2,"[172, 182, 191, 183, 180, 181, 165, 164, 173, ...","[199, 156, 188, 210, 165, 198, 216, 176, 203, ...","[169, 171, 170, 177, 181, 182, 181, 185, 194, ...","[192, 153, 193, 195, 155, 192, 197, 154, 185, ...","[[[188, 147, 191], [186, 148, 189], [187, 150,...","[[[190, 153, 194], [192, 154, 196], [191, 153,...","[190, 153, 194, 192, 154, 196, 191, 153, 195, ..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025030...,Benign keratosis-like lesions,2,"[98, 149, 170, 193, 183, 162, 164, 100, 137, 1...","[115, 87, 115, 180, 133, 158, 200, 153, 187, 2...","[19, 57, 105, 140, 149, 148, 144, 155, 170, 17...","[25, 14, 30, 68, 48, 75, 123, 93, 126, 158, 12...","[[[25, 15, 23], [25, 14, 22], [25, 14, 22], [2...","[[[23, 13, 22], [24, 14, 24], [25, 14, 28], [3...","[23, 13, 22, 24, 14, 24, 25, 14, 28, 31, 19, 3..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0026769...,Benign keratosis-like lesions,2,"[165, 164, 179, 172, 152, 163, 169, 151, 168, ...","[203, 146, 164, 209, 144, 153, 217, 160, 181, ...","[155, 163, 161, 167, 167, 172, 155, 152, 165, ...","[192, 138, 153, 200, 145, 163, 201, 142, 160, ...","[[[186, 128, 140], [188, 128, 136], [183, 126,...","[[[185, 127, 137], [189, 133, 147], [194, 136,...","[185, 127, 137, 189, 133, 147, 194, 136, 151, ..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025661...,Benign keratosis-like lesions,2,"[109, 159, 167, 166, 163, 159, 155, 96, 141, 1...","[135, 96, 110, 198, 141, 156, 208, 148, 158, 2...","[25, 71, 116, 139, 136, 153, 148, 161, 172, 16...","[38, 19, 30, 95, 59, 72, 143, 103, 119, 171, 1...","[[[24, 9, 16], [22, 11, 15], [23, 11, 15], [26...","[[[24, 11, 17], [26, 13, 22], [38, 21, 32], [5...","[24, 11, 17, 26, 13, 22, 38, 21, 32, 55, 31, 4..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,..\dataset\HAM10000_images_part_2\ISIC_0031633...,Benign keratosis-like lesions,2,"[173, 202, 210, 194, 208, 248, 243, 205, 180, ...","[204, 156, 187, 232, 185, 216, 236, 196, 220, ...","[129, 162, 181, 196, 205, 208, 205, 213, 225, ...","[158, 113, 139, 194, 144, 174, 215, 162, 191, ...","[[[122, 80, 102], [124, 82, 104], [127, 83, 10...","[[[134, 90, 113], [147, 102, 125], [159, 115, ...","[134, 90, 113, 147, 102, 125, 159, 115, 140, 1..."


### Scaling

#### Create train and test split

In [26]:
X = skin_df.drop(columns=['cell_type_idx'],axis=1)
y = skin_df['cell_type_idx']

In [27]:
from sklearn.model_selection import train_test_split
X_train_tmp, X_test_tmp, y_train_tmp, y_test_tmp = train_test_split(X, y, test_size=0.2,random_state=7)

In [28]:
X_train_tmp.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,hmnist_8_8_L,hmnist_8_8_RGB,hmnist_28_28_L,hmnist_28_28_RGB,image_450_600,image_100_75,image_100_75_reshaped
2181,HAM_0001814,ISIC_0028567,mel,histo,65.0,female,foot,..\dataset\HAM10000_images_part_1\ISIC_0028567...,Melanoma,"[137, 154, 178, 187, 192, 192, 176, 144, 138, ...","[170, 126, 110, 187, 143, 132, 206, 168, 162, ...","[125, 133, 137, 142, 145, 152, 159, 166, 173, ...","[158, 114, 99, 166, 122, 105, 171, 126, 110, 1...","[[[148, 104, 91], [150, 106, 93], [151, 107, 9...","[[[153, 108, 96], [155, 111, 99], [157, 114, 9...","[153, 108, 96, 155, 111, 99, 157, 114, 97, 160..."
8162,HAM_0003099,ISIC_0025734,nv,histo,25.0,male,back,..\dataset\HAM10000_images_part_1\ISIC_0025734...,Melanocytic nevi,"[157, 161, 168, 160, 140, 117, 104, 150, 163, ...","[202, 142, 121, 207, 145, 127, 213, 152, 137, ...","[152, 153, 157, 158, 161, 159, 160, 163, 160, ...","[197, 138, 114, 198, 138, 113, 203, 142, 120, ...","[[[193, 129, 101], [193, 129, 101], [192, 131,...","[[[193, 134, 107], [196, 139, 116], [195, 136,...","[193, 134, 107, 196, 139, 116, 195, 136, 108, ..."
7556,HAM_0007415,ISIC_0032541,nv,histo,35.0,female,lower extremity,..\dataset\HAM10000_images_part_2\ISIC_0032541...,Melanocytic nevi,"[138, 158, 168, 173, 175, 168, 154, 136, 150, ...","[149, 133, 139, 167, 153, 161, 177, 164, 173, ...","[123, 132, 139, 144, 149, 152, 158, 162, 163, ...","[136, 118, 121, 143, 127, 132, 149, 134, 140, ...","[[[127, 112, 109], [128, 110, 110], [129, 113,...","[[[130, 112, 116], [133, 115, 119], [137, 120,...","[130, 112, 116, 133, 115, 119, 137, 120, 123, ..."
7637,HAM_0004882,ISIC_0034282,nv,histo,20.0,male,chest,..\dataset\HAM10000_images_part_2\ISIC_0034282...,Melanocytic nevi,"[168, 176, 182, 190, 189, 180, 177, 171, 170, ...","[184, 158, 178, 193, 167, 184, 200, 172, 190, ...","[158, 162, 169, 171, 172, 172, 176, 180, 181, ...","[178, 147, 167, 181, 152, 171, 185, 159, 179, ...","[[[179, 153, 178], [179, 154, 176], [178, 155,...","[[[179, 153, 172], [180, 154, 174], [177, 146,...","[179, 153, 172, 180, 154, 174, 177, 146, 166, ..."
6051,HAM_0000051,ISIC_0026342,nv,follow_up,45.0,male,abdomen,..\dataset\HAM10000_images_part_1\ISIC_0026342...,Melanocytic nevi,"[188, 184, 176, 173, 182, 192, 194, 189, 183, ...","[246, 165, 160, 245, 160, 156, 240, 151, 139, ...","[190, 192, 191, 190, 192, 191, 191, 191, 190, ...","[246, 167, 162, 247, 170, 166, 247, 169, 166, ...","[[[246, 165, 162], [245, 166, 162], [246, 167,...","[[[246, 165, 161], [247, 169, 164], [247, 169,...","[246, 165, 161, 247, 169, 164, 247, 169, 165, ..."


In [29]:
np.asarray(X_train_tmp['image_100_75_reshaped'].tolist())

array([[153, 108,  96, ..., 181, 140, 130],
       [193, 134, 107, ..., 206, 155, 154],
       [130, 112, 116, ..., 140, 127, 133],
       ...,
       [229, 139, 138, ..., 206, 129, 115],
       [188, 159, 173, ..., 167, 140, 163],
       [122,  92, 107, ..., 152, 129, 146]], dtype=uint8)

In [30]:
X_train = np.asarray(X_train_tmp['image_100_75_reshaped'].tolist())
X_test = np.asarray(X_test_tmp['image_100_75_reshaped'].tolist())

#### Zero-centering

In [31]:
X_train.shape

(8012, 22500)

In [37]:
X_train_zero = X_train - X_train.mean(axis=0)

In [None]:
X_test_zero = X_test - X_test.mean(axis=0)

In [42]:
X_train_zero = (X_train_zero - X_train_zero.min()) / (X_train_zero.max() - X_train_zero.min())

In [None]:
X_test_zero = (X_test_zero - X_test_zero.min()) / (X_test_zero.max() - X_test_zero.min())

#### Normalization

In [43]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.fit_transform(X_test)



#### Standardization

In [44]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.fit_transform(X_test)



#### Hot-encoding

#### Split training and validation set

In [None]:
x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size = 0.1, random_state = 2)

In [None]:
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_validate = x_validate.reshape(x_validate.shape[0], *(75, 100, 3))