## Initialization 

In [1]:
import keras
import tensorflow as tf
from tensorflow.keras.applications.resnet import ResNet50, ResNet101, ResNet152, preprocess_input
import random 
import numpy as np
import pandas as pd
import cv2

# data_path = 'https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DBW86T'

## Meta data loading 


In [2]:

path_to_metadata = 'D:/Data/HAM10000_metadata.csv'

metadata =pd.read_csv(path_to_metadata)

## Computation of class weights

In [3]:
# class_weights = {0:1, 1:1, 2:1, 3:10, 4:1, 5:20, 6:20}

label = ['mel', 'nv', 'bcc',  'akiec', 'bkl', 'df', 'vasc']

def estimate_class_weights(label, method = 'mfb'):
    class_weights = np.zeros_like(label, dtype = np.float32)
    counts = np.zeros_like(label)
    for i,l in enumerate(label):
        counts[i] = metadata[metadata['dx']==str(l)]['dx'].value_counts()[0]
    counts = counts.astype(np.float32)
    median_freq = np.median(counts)
    mode_freq = np.max(counts)

    func = lambda x:median_freq / x if method == 'mfb' else mode_freq/x
    class_weights = list(map(func, counts))
    weights = {i:class_weights[i] for i in range(7)}
    
    return weights


class_weights = estimate_class_weights(label, method = 'mfb')
class_weights

{0: 0.4618149,
 1: 0.07665921,
 2: 1.0,
 3: 1.5718654,
 4: 0.46769792,
 5: 4.4695654,
 6: 3.6197183}

## Sequence generator

In [10]:
class DataGenerator(tf.keras.utils.Sequence):

    def __init__(self,
                 batch_size = 5,
                 dim = (224, 224),
                 n_channels = 3,
                 n_classes = 7,
                 shuffle = True,
                 images_address = "D:\\Data\\New folder (3)\\HAM10000_images_part_1",
                 label_address = "D:\\Data\\New folder (3)\\hmnist_8_8_L.csv"
                 ):

        self.dim = dim
        self.batch_size = batch_size
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.images_address = images_address
        self.labels = pd.read_csv(label_address)
        self.image_name = self.labels['image'].values
        self.labels.drop(columns=['image'], inplace=True)
        self.on_epoch_end()

    def __len__(self):

        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.image_name) / self.batch_size))

    def __getitem__(self, index):

        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find a list of labels
        labels_index_temp = [self.image_name[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(labels_index_temp)

        return X, y

    def on_epoch_end(self):

        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.image_name))
        if self.shuffle:
            np.random.shuffle(self.indexes)


    def __data_generation(self, labels_index_temp):

        # Generates data containing batch_size samples'

        input_data = np.empty((self.batch_size, *self.dim, self.n_channels))
        label = np.empty((self.batch_size, self.n_classes), dtype=np.float32)

        for index, item in enumerate(labels_index_temp):

            img = cv2.imread(self.images_address + item + '.jpg')
            img = cv2.resize(img, self.dim, cv2.INTER_CUBIC)
            input_data[index,] = tf.keras.applications.resnet50.preprocess_input(img)
            label[index] = self.labels.values[np.where(self.image_name == item)]


        return input_data.astype(np.float32), label


## Data loading 

In [None]:
img_path_tr = "D:\\Data\\New folder (3)\\HAM10000_images_part_1"
label_path_tr = "D:\\Data\\New folder (3)\\hmnist_8_8_L.csv"

 
batch_size = 5
train_generator = DataGenerator(batch_size = batch_size, 
                               images_address = img_path_tr, 
                               label_address = label_path_tr)



## Transfer learning - Feature represenation of ResNet50 with a customized learning) 

In [6]:
feature_maps = ResNet50(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3)) 
model = keras.models.Sequential()
feature_maps.trainable = False
model.add(feature_maps)
# model.add(keras.layers.MaxPool2D((7, 7)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(7, activation = 'softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 flatten (Flatten)           (None, 100352)            0         
                                                                 
 dense (Dense)               (None, 7)                 702471    
                                                                 
Total params: 24,290,183
Trainable params: 702,471
Non-trainable params: 23,587,712
_________________________________________________________________


In [7]:
# model.load_weights('cancer_model_primary_frozen_layers_weighting.hdf5')

#### Another way of construction

In [160]:
# target_model = feature_maps.output
# target_model = keras.layers.AveragePooling2D(pool_size = (7, 7))(target_model)
# target_model = keras.layers.Flatten()(target_model)
# target_model = keras.layers.Dense(7, activation = 'softmax')(target_model)
# model = keras.models.Model(inputs = feature_maps.input, outputs = target_model)

## Model configuration

In [8]:
# class_weights = {0:1, 1:1, 2:1, 3:10, 4:1, 5:20, 6:20}

model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3), 
              loss = 'categorical_crossentropy', 
              metrics =['categorical_crossentropy', 'accuracy'])

model.fit(train_generator, 
          validation_data = validation_generator, 
          epochs = 10,  
          shuffle = True, 
          class_weight=class_weights)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2ab43491c88>

In [None]:
# model.save('model.hdf5')

In [24]:
validation_generator = DataGenerator(batch_size = 1, 
                               images_address = img_path_val, 
                               label_address = label_path_val)


y_tr_pre = []
y_tr_te = []
for x, y in validation_generator:
    y_pre_te = model.predict(x)
    y_tr_pre.append(np.argmax(y_pre_te[0], axis = 0))
    y_tr_te.append(np.argmax(y[0], axis = 0))


In [25]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_tr_te, y_tr_pre))

[[ 15   2   0   1   3   0   0]
 [  7 106   2   0   8   0   0]
 [  0   0   8   2   4   1   0]
 [  1   0   1   1   5   0   0]
 [  6   2   1   1  12   0   0]
 [  0   0   0   0   0   1   0]
 [  1   0   0   0   1   0   1]]


In [26]:
print(classification_report(y_tr_te, y_tr_pre))

              precision    recall  f1-score   support

           0       0.50      0.71      0.59        21
           1       0.96      0.86      0.91       123
           2       0.67      0.53      0.59        15
           3       0.20      0.12      0.15         8
           4       0.36      0.55      0.44        22
           5       0.50      1.00      0.67         1
           6       1.00      0.33      0.50         3

    accuracy                           0.75       193
   macro avg       0.60      0.59      0.55       193
weighted avg       0.79      0.75      0.76       193



In [172]:
train_generator = DataGenerator(batch_size = 1, 
                               images_address = img_path_tr, 
                               label_address = label_path_tr)

y_tr_pre = []
y_tr_te = []
for x, y in train_generator:
    y_pre_te = model.predict(x)
    y_tr_pre.append(np.argmax(y_pre_te[0], axis = 0))
    y_tr_te.append(np.argmax(y[0], axis = 0))

In [173]:
print(confusion_matrix(y_tr_te, y_tr_pre))

[[1053   46    4    4    6    0    0]
 [ 409 5963   59   33  218   18    5]
 [   5    8  481    4   15    1    0]
 [  10    1    1  308    7    0    0]
 [  70   42    2    8  976    0    1]
 [   1    0    1    1    1  111    0]
 [   1    7    2    1    2    1  128]]


In [176]:
print(classification_report(y_tr_te, y_tr_pre))

              precision    recall  f1-score   support

           0       0.68      0.95      0.79      1113
           1       0.98      0.89      0.93      6705
           2       0.87      0.94      0.90       514
           3       0.86      0.94      0.90       327
           4       0.80      0.89      0.84      1099
           5       0.85      0.97      0.90       115
           6       0.96      0.90      0.93       142

    accuracy                           0.90     10015
   macro avg       0.86      0.92      0.89     10015
weighted avg       0.92      0.90      0.90     10015

