In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pathlib
#working_dir = pathlib.Path().absolute()
#os.chdir(working_dir)
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Reshape
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.optimizers import Adam, SGD
from PIL import Image
import tables

In [2]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [3]:
meta_df=pd.read_csv('skin-cancer-mnist-ham10000/HAM10000_metadata.csv')

In [4]:
meta_df['path']='skin-cancer-mnist-ham10000/HAM10000_images/'+meta_df['image_id']+'.jpg'
meta_df['cell_type'] = meta_df['dx'].map(lesion_type_dict.get) 
meta_df['cell_type_idx'] = pd.Categorical(meta_df['cell_type']).codes

In [5]:
meta_df['age'].fillna((meta_df['age'].mean()), inplace=True)

In [6]:
meta_df.isnull().sum()

lesion_id        0
image_id         0
dx               0
dx_type          0
age              0
sex              0
localization     0
path             0
cell_type        0
cell_type_idx    0
dtype: int64

In [7]:
%%time
meta_df['path'][1:100].map(lambda x: np.asarray(Image.open(x).resize((64,48))))

Wall time: 1.98 s


1     [[[23, 13, 23], [25, 14, 30], [39, 26, 49], [6...
2     [[[186, 130, 141], [192, 136, 151], [198, 143,...
3     [[[24, 11, 19], [38, 21, 31], [66, 39, 51], [9...
4     [[[141, 97, 120], [160, 115, 140], [179, 134, ...
5     [[[5, 0, 1], [9, 1, 4], [13, 3, 7], [17, 5, 10...
                            ...                        
95    [[[102, 65, 75], [111, 73, 81], [117, 78, 85],...
96    [[[167, 110, 129], [169, 114, 131], [170, 114,...
97    [[[147, 114, 136], [147, 113, 136], [147, 110,...
98    [[[19, 15, 16], [23, 16, 16], [28, 18, 18], [4...
99    [[[130, 107, 114], [142, 115, 120], [146, 119,...
Name: path, Length: 99, dtype: object

In [49]:
%%time
meta_df['path'][1:10].map(lambda x: np.asarray(Image.open(x).convert('L').resize((64,48))))

Wall time: 175 ms


1    [[17, 19, 33, 54, 78, 97, 113, 127, 145, 150, ...
2    [[148, 154, 161, 161, 161, 161, 166, 169, 173,...
3    [[16, 27, 49, 72, 88, 106, 127, 134, 132, 123,...
4    [[113, 131, 150, 162, 169, 179, 183, 193, 199,...
5    [[2, 4, 6, 9, 12, 15, 16, 15, 21, 46, 76, 107,...
6    [[160, 158, 159, 163, 165, 159, 161, 165, 167,...
7    [[120, 133, 138, 136, 139, 139, 143, 144, 145,...
8    [[81, 100, 114, 128, 139, 136, 137, 145, 154, ...
9    [[197, 194, 191, 192, 190, 189, 187, 186, 178,...
Name: path, dtype: object

In [8]:
%%time
meta_df['image_rgb'] = meta_df['path'].map(lambda x: np.asarray(Image.open(x).resize((48,36))))/255
meta_df['image_gray'] = meta_df['path'].map(lambda x: np.asarray(Image.open(x).convert('L').resize((48,36))))/255
meta_df.to_hdf('ham10000_48_36.h5',key='meta_df')


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->['lesion_id', 'image_id', 'dx', 'dx_type', 'sex', 'localization', 'path', 'cell_type', 'image_rgb', 'image_gray']]

  pytables.to_hdf(path_or_buf, key, self, **kwargs)


Wall time: 6min


In [10]:
meta_df.image_rgb[0].shape

(36, 48, 3)

In [11]:
%%time
meta_df['image_rgb'] = meta_df['path'].map(lambda x: np.asarray(Image.open(x).resize((32,24))))/255
meta_df['image_gray'] = meta_df['path'].map(lambda x: np.asarray(Image.open(x).convert('L').resize((32,24))))/255
meta_df.to_hdf('ham10000_32_24.h5',key='meta_df')


Wall time: 6min 46s


In [55]:
%%time
meta_df['image_rgb'] = meta_df['path'].map(lambda x: np.asarray(Image.open(x).resize((64,48))))/255
meta_df['image_gray'] = meta_df['path'].map(lambda x: np.asarray(Image.open(x).convert('L').resize((64,48))))/255
meta_df.to_hdf('ham10000_64_48.h5',key='meta_df')


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->['lesion_id', 'image_id', 'dx', 'dx_type', 'sex', 'localization', 'path', 'cell_type', 'image_rgb', 'image_gray']]

  pytables.to_hdf(path_or_buf, key, self, **kwargs)


Wall time: 6min 17s


In [56]:
qqq=pd.read_hdf('ham10000_64_48.h5')

In [57]:
type(qqq)

pandas.core.frame.DataFrame

In [58]:
qqq.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,image_rgb,image_gray
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[0.7490196078431373, 0.5882352941176471, 0.7...","[[0.6549019607843137, 0.6627450980392157, 0.66..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[0.09019607843137255, 0.050980392156862744, ...","[[0.06666666666666667, 0.07450980392156863, 0...."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[0.7294117647058823, 0.5098039215686274, 0.5...","[[0.5803921568627451, 0.6039215686274509, 0.63..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[0.09411764705882353, 0.043137254901960784, ...","[[0.06274509803921569, 0.10588235294117647, 0...."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[0.5529411764705883, 0.3803921568627451, 0.4...","[[0.44313725490196076, 0.5137254901960784, 0.5..."


In [60]:
np.__version__

'1.17.5'

In [26]:
meta_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,image
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[191, 150, 191], [192, 153, 194], [193, 151,..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[23, 13, 23], [25, 14, 30], [39, 26, 49], [6..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[186, 130, 141], [192, 136, 151], [198, 143,..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[24, 11, 19], [38, 21, 31], [66, 39, 51], [9..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[141, 97, 120], [160, 115, 140], [179, 134, ..."


In [28]:
meta_df.to_csv('kkkk.csv')

In [29]:
meta_df.image

0        [[[191, 150, 191], [192, 153, 194], [193, 151,...
1        [[[23, 13, 23], [25, 14, 30], [39, 26, 49], [6...
2        [[[186, 130, 141], [192, 136, 151], [198, 143,...
3        [[[24, 11, 19], [38, 21, 31], [66, 39, 51], [9...
4        [[[141, 97, 120], [160, 115, 140], [179, 134, ...
                               ...                        
10010    [[[171, 153, 166], [170, 153, 166], [167, 150,...
10011    [[[6, 7, 4], [7, 8, 5], [9, 8, 6], [33, 30, 28...
10012    [[[119, 110, 112], [140, 127, 127], [136, 119,...
10013    [[[160, 123, 145], [157, 118, 137], [162, 130,...
10014    [[[175, 142, 126], [176, 142, 124], [180, 149,...
Name: image, Length: 10015, dtype: object

In [33]:
meta_df.image[0]

array([[[191, 150, 191],
        [192, 153, 194],
        [193, 151, 194],
        ...,
        [197, 156, 167],
        [194, 152, 161],
        [194, 154, 166]],

       [[191, 152, 188],
        [194, 156, 194],
        [197, 161, 202],
        ...,
        [197, 156, 164],
        [194, 151, 156],
        [190, 147, 156]],

       [[196, 163, 200],
        [199, 162, 204],
        [200, 164, 203],
        ...,
        [197, 157, 169],
        [196, 157, 169],
        [187, 146, 165]],

       ...,

       [[180, 146, 181],
        [189, 156, 186],
        [198, 166, 194],
        ...,
        [190, 159, 185],
        [190, 160, 189],
        [190, 158, 195]],

       [[172, 138, 172],
        [180, 146, 175],
        [189, 153, 178],
        ...,
        [186, 153, 173],
        [187, 155, 179],
        [185, 152, 177]],

       [[163, 128, 158],
        [171, 133, 159],
        [180, 141, 168],
        ...,
        [185, 153, 177],
        [185, 154, 180],
        [182, 151, 172]]

In [32]:
type(meta_df.image)

pandas.core.series.Series

In [34]:
import sys
sys.getsizeof(meta_df)

8434141

In [37]:
import tables
meta_df.to_hdf('kkkk.h5',key='meta_df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->['lesion_id', 'image_id', 'dx', 'dx_type', 'sex', 'localization', 'path', 'cell_type', 'image']]

  pytables.to_hdf(path_or_buf, key, self, **kwargs)


In [38]:
df2=pd.read_hdf('kkkk.h5','meta_df')

In [39]:
df2.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx,image
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[191, 150, 191], [192, 153, 194], [193, 151,..."
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[23, 13, 23], [25, 14, 30], [39, 26, 49], [6..."
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[186, 130, 141], [192, 136, 151], [198, 143,..."
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[24, 11, 19], [38, 21, 31], [66, 39, 51], [9..."
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,skin-cancer-mnist-ham10000/HAM10000_images/ISI...,Benign keratosis-like lesions,2,"[[[141, 97, 120], [160, 115, 140], [179, 134, ..."


In [41]:
df2.image[0].shape

(48, 64, 3)