Install new packages: Image from PIL

In [1]:
import pandas as pd
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import imageio
from PIL import Image

### Data retrieval and exploration

In [2]:
path = '~/code/cecyprice/skin_lesion_detection/dataset/'
metadata = pd.read_csv(path + 'HAM10000_metadata.csv')
dim1_L = pd.read_csv(path + 'hmnist_8_8_L.csv')
dim1_RGB = pd.read_csv(path + 'hmnist_8_8_RGB.csv')
dim2_L = pd.read_csv(path + 'hmnist_28_28_L.csv')
dim2_RGB = pd.read_csv(path + 'hmnist_28_28_RGB.csv')

### Create dictionary of images and labels

In [3]:
base_skin_dir = os.path.join('..','dataset')
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join(base_skin_dir, '*', '*.jpg'))}

In [4]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

### Merge all datasets

In [5]:
skin_df = pd.read_csv(os.path.join(base_skin_dir, 'HAM10000_metadata.csv'))

In [6]:
skin_df['path'] = skin_df['image_id'].map(imageid_path_dict.get)
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get) 
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

In [7]:
skin_df.dropna(inplace=True)

In [8]:
np.asarray(Image.open(skin_df.path[0]).resize((100,75))).shape

(75, 100, 3)

In [9]:
skin_df['images'] = skin_df['path'].map(lambda x: np.asarray(Image.open(x))).apply(lambda x : x.reshape(810000))
skin_df['images_resized'] = skin_df['path'].map(lambda x: np.asarray(Image.open(x).resize((100,75)))).apply(lambda x : x.reshape(22500))

In [10]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,images,images_resized
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0027419...,Benign keratosis-like lesions,2,"[188, 147, 191, 186, 148, 189, 187, 150, 191, ...","[190, 153, 194, 192, 154, 196, 191, 153, 195, ..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025030...,Benign keratosis-like lesions,2,"[25, 15, 23, 25, 14, 22, 25, 14, 22, 25, 14, 2...","[23, 13, 22, 24, 14, 24, 25, 14, 28, 31, 19, 3..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0026769...,Benign keratosis-like lesions,2,"[186, 128, 140, 188, 128, 136, 183, 126, 133, ...","[185, 127, 137, 189, 133, 147, 194, 136, 151, ..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025661...,Benign keratosis-like lesions,2,"[24, 9, 16, 22, 11, 15, 23, 11, 15, 26, 11, 16...","[24, 11, 17, 26, 13, 22, 38, 21, 32, 55, 31, 4..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,..\dataset\HAM10000_images_part_2\ISIC_0031633...,Benign keratosis-like lesions,2,"[122, 80, 102, 124, 82, 104, 127, 83, 106, 130...","[134, 90, 113, 147, 102, 125, 159, 115, 140, 1..."


In [17]:
skin_df.images_resized[0].shape

(22500,)

In [24]:
np.array([i.reshape(75, 100, 3) for i in skin_df.images_resized.values])

array([[[[190, 153, 194],
         [192, 154, 196],
         [191, 153, 195],
         ...,
         [194, 152, 161],
         [193, 151, 163],
         [196, 157, 169]],

        [[190, 146, 188],
         [191, 149, 185],
         [193, 154, 194],
         ...,
         [194, 152, 158],
         [193, 150, 160],
         [194, 153, 163]],

        [[190, 153, 189],
         [193, 154, 189],
         [195, 157, 196],
         ...,
         [194, 151, 155],
         [191, 147, 152],
         [188, 143, 151]],

        ...,

        [[171, 137, 172],
         [177, 143, 175],
         [182, 148, 176],
         ...,
         [188, 156, 180],
         [186, 154, 179],
         [185, 152, 176]],

        [[165, 131, 164],
         [171, 137, 167],
         [177, 141, 168],
         ...,
         [185, 152, 174],
         [184, 151, 174],
         [182, 149, 168]],

        [[159, 124, 155],
         [164, 127, 154],
         [169, 130, 153],
         ...,
         [186, 156, 184],
        

### Scaling

In [11]:
skin_df['images_norm']=skin_df.images_resized.apply(lambda x: x/255)

In [12]:
skin_df['images_centered']=skin_df.images_resized.apply(lambda x: ((x - x.mean(axis=0))-(x - x.mean(axis=0)).min())/((x - x.mean(axis=0)).max()-(x - x.mean(axis=0)).min()))

In [13]:
skin_df['images_std'] = skin_df.images_resized.apply(lambda x:(x - x.mean(axis=0))/x.std(axis=0))

In [14]:
skin_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,images,images_resized,images_norm,images_centered,images_std
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0027419...,Benign keratosis-like lesions,2,"[188, 147, 191, 186, 148, 189, 187, 150, 191, ...","[190, 153, 194, 192, 154, 196, 191, 153, 195, ...","[0.7450980392156863, 0.6, 0.7607843137254902, ...","[0.7010309278350515, 0.5103092783505154, 0.721...","[0.19950164907982235, -1.1114551972296924, 0.3..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025030...,Benign keratosis-like lesions,2,"[25, 15, 23, 25, 14, 22, 25, 14, 22, 25, 14, 2...","[23, 13, 22, 24, 14, 24, 25, 14, 28, 31, 19, 3...","[0.09019607843137255, 0.050980392156862744, 0....","[0.04700854700854701, 0.004273504273504274, 0....","[-3.8856808502375855, -4.138042798574535, -3.9..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0026769...,Benign keratosis-like lesions,2,"[186, 128, 140, 188, 128, 136, 183, 126, 133, ...","[185, 127, 137, 189, 133, 147, 194, 136, 151, ...","[0.7254901960784313, 0.4980392156862745, 0.537...","[0.6764705882352942, 0.3352941176470588, 0.394...","[0.11576717234475732, -1.8118885242587746, -1...."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,..\dataset\HAM10000_images_part_1\ISIC_0025661...,Benign keratosis-like lesions,2,"[24, 9, 16, 22, 11, 15, 23, 11, 15, 26, 11, 16...","[24, 11, 17, 26, 13, 22, 38, 21, 32, 55, 31, 4...","[0.09411764705882353, 0.043137254901960784, 0....","[0.06779661016949153, 0.012711864406779662, 0....","[-3.4394152137644385, -3.7561217089652135, -3...."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,..\dataset\HAM10000_images_part_2\ISIC_0031633...,Benign keratosis-like lesions,2,"[122, 80, 102, 124, 82, 104, 127, 83, 106, 130...","[134, 90, 113, 147, 102, 125, 159, 115, 140, 1...","[0.5254901960784314, 0.35294117647058826, 0.44...","[0.45982142857142855, 0.26339285714285715, 0.3...","[-1.211232724478313, -2.192314552914663, -1.67..."
