In [1]:
import pandas as pd
import os
import tensorflow as tf

from keras_preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from PIL import ImageFile
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

ImageFile.LOAD_TRUNCATED_IMAGES = True

# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

# gpu_options = tf.GPUOptions(allow_growth=True)
# session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

In [2]:
# tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None)
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [4]:
tf.test.is_built_with_cuda()

True

In [5]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [6]:
data = pd.read_csv('data/HAM10000_metadata')
data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern


In [7]:
unwanted_label = ['vasc', 'df']
data_pd = data.query("dx!=@unwanted_label")
print(data_pd['dx'].unique().tolist())

['bkl', 'nv', 'mel', 'bcc', 'akiec']


In [8]:
train_dir = os.path.join('HAM10000', 'train_dir')
test_dir = os.path.join('HAM10000', 'test_dir')

In [9]:
df_count = data_pd.groupby('lesion_id').count()
df_count.head()

df_count = df_count[df_count['dx'] == 1]
df_count.reset_index(inplace=True)

In [10]:
def duplicates(x):
    unique = set(df_count['lesion_id'])
    if x in unique:
        return 'no'
    else:
        return 'duplicates'

In [11]:
data_pd['is_duplicate'] = data_pd['lesion_id'].apply(duplicates)
data_pd.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,is_duplicate
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,duplicates
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,duplicates


In [12]:
df_count = data_pd[data_pd['is_duplicate'] == 'no']

In [13]:
train, test_df = train_test_split(df_count, test_size=0.15, stratify=df_count['dx'])

In [14]:
def identify_trainOrtest(x):
    test_data = set(test_df['image_id'])
    if str(x) in test_data:
        return 'test'
    else:
        return 'train'

In [15]:
#creating train_df
data_pd['train_test_split'] = data_pd['image_id'].apply(identify_trainOrtest)
train_df = data_pd[data_pd['train_test_split'] == 'train']
train_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,is_duplicate,train_test_split
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,duplicates,train
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,duplicates,train
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,duplicates,train
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,duplicates,train
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,duplicates,train


In [16]:
test_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,is_duplicate
5350,HAM_0003118,ISIC_0026483,nv,follow_up,65.0,male,abdomen,vidir_molemax,no
2049,HAM_0006257,ISIC_0029843,mel,histo,70.0,male,neck,rosendahl,no
3718,HAM_0005671,ISIC_0029697,nv,follow_up,55.0,male,trunk,vidir_molemax,no
3297,HAM_0002321,ISIC_0029015,nv,follow_up,45.0,male,back,vidir_molemax,no
409,HAM_0002769,ISIC_0032481,bkl,histo,80.0,male,back,rosendahl,no


In [17]:
print(train_df)

         lesion_id      image_id     dx dx_type   age     sex localization  \
0      HAM_0000118  ISIC_0027419    bkl   histo  80.0    male        scalp   
1      HAM_0000118  ISIC_0025030    bkl   histo  80.0    male        scalp   
2      HAM_0002730  ISIC_0026769    bkl   histo  80.0    male        scalp   
3      HAM_0002730  ISIC_0025661    bkl   histo  80.0    male        scalp   
4      HAM_0001466  ISIC_0031633    bkl   histo  75.0    male          ear   
...            ...           ...    ...     ...   ...     ...          ...   
10010  HAM_0002867  ISIC_0033084  akiec   histo  40.0    male      abdomen   
10011  HAM_0002867  ISIC_0033550  akiec   histo  40.0    male      abdomen   
10012  HAM_0002867  ISIC_0033536  akiec   histo  40.0    male      abdomen   
10013  HAM_0000239  ISIC_0032854  akiec   histo  80.0    male         face   
10014  HAM_0003521  ISIC_0032258    mel   histo  70.0  female         back   

            dataset is_duplicate train_test_split  
0      vidi

In [18]:
# Image id of train and test images
train_list = list(train_df['image_id'])
test_list = list(test_df['image_id'])

len(test_list)

812

In [19]:
len(train_list)


8946

In [20]:
# Set the image_id as the index in data_pd
data_pd.set_index('image_id', inplace=True)

In [21]:
os.mkdir(train_dir)
os.mkdir(test_dir)

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'HAM10000\\train_dir'

In [18]:
from TrainTestGenerator import generateTrainTestDir

generateTrainTestDir(train_dir, test_dir, data_pd, train_list, test_list)

In [19]:
from DataAugmentation import startAugmentation

startAugmentation()

Found 304 images belonging to 1 classes.
Found 488 images belonging to 1 classes.
Found 1033 images belonging to 1 classes.
Found 1079 images belonging to 1 classes.
Found 6042 images belonging to 1 classes.


In [21]:
train_path = 'HAM10000/train_dir'
test_path = 'HAM10000/test_dir'
batch_size = 16

In [22]:
datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.inception_resnet_v2.preprocess_input)

In [23]:
image_size = 224
print("\nTrain Batches: ")
train_batches = datagen.flow_from_directory(directory=train_path,
                                            target_size=(image_size, image_size),
                                            batch_size=batch_size,
                                            shuffle=True)

print("\nTest Batches: ")
test_batches = datagen.flow_from_directory(test_path,
                                           target_size=(image_size, image_size),
                                           batch_size=batch_size,
                                           shuffle=False)


Train Batches: 
Found 38726 images belonging to 5 classes.

Test Batches: 
Found 812 images belonging to 5 classes.


In [24]:
from ModelResNet import create_model

model = create_model()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
______________________________________________________________________________________________

In [25]:
opt1 = tf.keras.optimizers.Adam(learning_rate=0.01, epsilon=0.1)
model.compile(optimizer=opt1,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [26]:
class_weights = {
    0: 1.0,  # bkl
    1: 1.0,  # nv
    2: 5.0,  # mel
    3: 1.0,  # bcc
    4: 1.0,  # akiec
}

checkpoint = ModelCheckpoint(filepath='ResNet152.hdf5', monitor='val_accuracy', save_best_only=True,
                             save_weights_only=True)
Earlystop = EarlyStopping(monitor='val_loss', mode='min', patience=40, min_delta=0.001)

In [27]:
history = model.fit(train_batches,
                    steps_per_epoch=(len(train_df) / 10),
                    epochs=300,
                    verbose=1,
                    validation_data=test_batches, validation_steps=len(test_df) / batch_size,
                    callbacks=[checkpoint, Earlystop], class_weight=class_weights)


Epoch 1/300
Epoch 2/300
  7/894 [..............................] - ETA: 4:09 - loss: 2.1037 - accuracy: 0.2143

KeyboardInterrupt: 

In [None]:
model.load_weights("ResNet152.hdf5")

In [None]:
from PredictAndEvaluate import evaluatemodel

predicted = evaluatemodel(model, test_batches, test_df, batch_size)
