In [1]:
import os
import openslide
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa

In [2]:
#Sökväg till mappen där de processeade bilderna finns
data_dir = r'D:\PhDMain\Courses\DeepLearning\PANDAS\ProcessedImages'
#ISUP-graderingarna för bilderna
labels = pd.read_csv(r'D:\PhDMain\Courses\DeepLearning\PANDAS\train.csv')

In [3]:
#Om vi skall använda N=16 eller N=100, dvs. de stora eller små bilderna. För denna kod använder vi bara de större bilderna. De 
#mindre används i en annan kod.
SIZE='large'
if SIZE=='small':
    DIM=512
    batch_size=32
elif SIZE=='large':
    DIM=1280
    batch_size=5

no_train=7962
no_valid=996
no_test=996

no_classes=6

In [4]:
#All data tar upp väldigt mycket RAM-minne; vi använder därför en generator som kan läsa in de bilderna från hårddisken
#som behövs för en batch åt gången. 
#Som grund för denna kod har jag använt mig av följande:
#https://medium.com/datadriveninvestor/keras-training-on-large-datasets-3e9d9dbc09d4
class DataGenerator(keras.utils.Sequence) :
      #Detta är vår konstruktor; den tar sökvägen till mappen vi vill använda, ISUP-graderingarna, batch-storlek samt om vi
      #vill använda de små eller de stora bilderna. 
  def __init__(self, path, labels, batch_size,image_size):
    self.path=path
    self.file_names = [name for name in os.listdir(path)]
    label_dict=dict(labels[['image_id','isup_grade']].values)
    self.labels = [label_dict[name[:-4]] for name in self.file_names]
    self.batch_size = batch_size
    self.image_size = image_size

    #Vi behöver definera [] och len() för att generator skall fungera. len() returner antalet batcher som behövs för en full
    #träningssepok.
  def __len__(self):
    return (np.ceil(len(self.file_names) / float(self.batch_size))).astype(np.int)
    #[i] returnerar batch i som två numpy-arrays; den första är bilderna, den andra är ISUP-graderingarna.

  def __getitem__(self, idx):
    batch_x = self.file_names[idx * self.batch_size : (idx+1) * self.batch_size]
    batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
    
    img_list=list()
    for file_name in batch_x:
        data=np.load(os.path.join(self.path,file_name))
        img_list.append(data[self.image_size])
        data.close()
    return np.array(img_list), np.array(batch_y)


In [5]:
#Vi sätter upp generatorer för både träningss- och valideringssettet.
train_generator = DataGenerator('D:\PhDMain\Courses\DeepLearning\PANDAS\ProcessedImages\TrainSet', labels, batch_size,SIZE)
valid_generator = DataGenerator('D:\PhDMain\Courses\DeepLearning\PANDAS\ProcessedImages\ValidationSet', labels, batch_size,SIZE)

In [6]:
#Vi laddar ner ResNet50V2 med tillhörande vikter via träning på ImageNet. Vi sätter också upp en
#dictionary för lager namnen så vi enkelt kan med lager från ResNet via lagernamnen.
ResNet50 = keras.applications.ResNet50V2(weights="imagenet",include_top=False,input_shape=[DIM,DIM,3])
LayerDict = dict([(layer.name, layer) for layer in ResNet50.layers])
ResNet50.summary()

Model: "resnet50v2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1280, 1280,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 1286, 1286, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 640, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 642, 642, 64) 0           conv1_conv[0][0]                 
_________________________________________________________________________________________

In [7]:
for i in range(len(ResNet50.layers)):
    ResNet50.layers[i].trainable = False

In [31]:
#Definitionen av den första modellen som vi testar; sex faltningslager, två åt gången, separerade av pooling-lager och 
#batch normalization. Sist har vi ett softmax-lager med 6 noder.
NM = LayerDict['post_relu'].output
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=512, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=512, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Flatten()(NM)
NM = keras.layers.Dense(no_classes, activation="softmax")(NM)
Model1 = tf.keras.models.Model(inputs=ResNet50.input, outputs=NM)
Model1.summary()

Model: "model_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1280, 1280,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 1286, 1286, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 640, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 642, 642, 64) 0           conv1_conv[0][0]                 
___________________________________________________________________________________________

In [32]:
Model1.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),tfa.metrics.CohenKappa(weightage='quadratic',
                                                                                                 num_classes=6,
                                                                                                 sparse_labels=True)])

In [33]:
Model1.fit_generator(generator=train_generator,
                   steps_per_epoch = int(no_train // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = valid_generator,
                   validation_steps = int(no_valid // batch_size),
                   callbacks=keras.callbacks.EarlyStopping(monitor='val_cohen_kappa',mode='max',patience=10, restore_best_weights=True))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


<tensorflow.python.keras.callbacks.History at 0x20bd0892ee0>

In [25]:
#Andra modellen; färre faltningslager, ett fully connected dense-layer med 100 noder samt ett dropout-lager med 0.5 fraction.
NM = LayerDict['post_relu'].output
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Flatten()(NM)
NM = keras.layers.Dense(100, activation="relu")(NM)
NM = keras.layers.Dropout(0.5)(NM)
NM = keras.layers.Dense(no_classes, activation="softmax")(NM)
Model2 = tf.keras.models.Model(inputs=ResNet50.input, outputs=NM)
Model2.summary()


Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 1280, 1280,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 1286, 1286, 3 0           input_3[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 640, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 642, 642, 64) 0           conv1_conv[0][0]                 
____________________________________________________________________________________________

In [26]:
Model2.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),tfa.metrics.CohenKappa(weightage='quadratic',
                                                                                                 num_classes=6,
                                                                                                 sparse_labels=True)])

Model2.fit_generator(generator=train_generator,
                   steps_per_epoch = int(no_train // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = valid_generator,
                   validation_steps = int(no_valid // batch_size),
                   callbacks=keras.callbacks.EarlyStopping(monitor='val_cohen_kappa',mode='max',patience=5, restore_best_weights=True))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<tensorflow.python.keras.callbacks.History at 0x1f7c23549d0>

In [27]:
for i in range(142):
    ResNet50.layers[i].trainable = False
#Definition av tredje modellen. Vi tar med några färre lager från ResNet denna gång.
NM = LayerDict['conv4_block5_out'].output
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Flatten()(NM)
NM = keras.layers.Dense(100, activation="relu")(NM)
NM = keras.layers.Dropout(0.5)(NM)
NM = keras.layers.Dense(no_classes, activation="softmax")(NM)
Model3 = tf.keras.models.Model(inputs=ResNet50.input, outputs=NM)

Model3.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),tfa.metrics.CohenKappa(weightage='quadratic',
                                                                                                 num_classes=6,
                                                                                                 sparse_labels=True)])

Model3.fit_generator(generator=train_generator,
                   steps_per_epoch = int(no_train // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = valid_generator,
                   validation_steps = int(no_valid // batch_size),
                   callbacks=keras.callbacks.EarlyStopping(monitor='val_cohen_kappa',mode='max',patience=5, restore_best_weights=True))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


<tensorflow.python.keras.callbacks.History at 0x1f79dd92c70>

In [None]:
ResNet50 = keras.applications.ResNet50V2(weights="imagenet",include_top=False,input_shape=[DIM,DIM,3])
LayerDict = dict([(layer.name, layer) for layer in ResNet50.layers])
#Som tredje modellen, fast med två fully connected layers på 512 istället, separareade med 0.5 dropouts.
for i in range(len(ResNet50.layers)):
    ResNet50.layers[i].trainable = False
NM = LayerDict['post_relu'].output
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Flatten()(NM)
NM = keras.layers.Dense(512, activation="relu")(NM)
NM = keras.layers.Dropout(0.5)(NM)
NM = keras.layers.Dense(512, activation="relu")(NM)
NM = keras.layers.Dropout(0.5)(NM)
NM = keras.layers.Dense(no_classes, activation="softmax")(NM)
Model4 = tf.keras.models.Model(inputs=ResNet50.input, outputs=NM)
Model4.summary()

train_generator = DataGenerator('D:\PhDMain\Courses\DeepLearning\PANDAS\ProcessedImages\TrainSet', labels, batch_size,SIZE)
valid_generator = DataGenerator('D:\PhDMain\Courses\DeepLearning\PANDAS\ProcessedImages\ValidationSet', labels, batch_size,SIZE)

Model4.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),tfa.metrics.CohenKappa(weightage='quadratic',
                                                                                                 num_classes=6,
                                                                                                 sparse_labels=True)])

Model4.fit_generator(generator=train_generator,
                   steps_per_epoch = int(no_train // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = valid_generator,
                   validation_steps = int(no_valid // batch_size),
                   callbacks=keras.callbacks.EarlyStopping(monitor='val_cohen_kappa',mode='max',patience=5, restore_best_weights=True))


Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 1280, 1280,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 1286, 1286, 3 0           input_4[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 640, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 642, 642, 64) 0           conv1_conv[0][0]                 
____________________________________________________________________________________________

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
 245/1592 [===>..........................] - ETA: 10:58 - loss: 1.6958 - acc: 0.2849 - cohen_kappa: -2.1553e-04

In [8]:
#Då fully connected layers verkar vara kastrofalt tar vi bort dessa. Istället lägger vi på ytterligare ett par faltnings-
#lager för att se om vi kan få till något bättre.
NM = LayerDict['post_relu'].output
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=512, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=512, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.Conv2D(filters=512, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=512, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Flatten()(NM)
NM = keras.layers.Dense(no_classes, activation="softmax")(NM)
Model5 = tf.keras.models.Model(inputs=ResNet50.input, outputs=NM)
Model5.summary()


Model5.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),tfa.metrics.CohenKappa(weightage='quadratic',
                                                                                                 num_classes=6,
                                                                                                 sparse_labels=True)])
Model5.fit_generator(generator=train_generator,
                   steps_per_epoch = int(no_train // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = valid_generator,
                   validation_steps = int(no_valid // batch_size),
                   callbacks=keras.callbacks.EarlyStopping(monitor='val_cohen_kappa',mode='max',patience=5, restore_best_weights=True))


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1280, 1280,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 1286, 1286, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 640, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 642, 642, 64) 0           conv1_conv[0][0]                 
______________________________________________________________________________________________

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<tensorflow.python.keras.callbacks.History at 0x20b7a2ca9a0>

In [28]:
#Ingen förbättring i jämförlese med första modellen får oss att pröva att bara ha 256 faltningskartor i våra faltningslager
#; vi har tio lager istället för åtta som i femte modellen.
NM = LayerDict['post_relu'].output
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(4,4), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.Conv2D(filters=256, kernel_size=(2,2), activation='relu',padding="same")(NM)
NM = keras.layers.MaxPool2D()(NM)
NM = keras.layers.BatchNormalization(axis=3)(NM)
NM = keras.layers.Flatten()(NM)
NM = keras.layers.Dense(no_classes, activation="softmax")(NM)
Model6 = tf.keras.models.Model(inputs=ResNet50.input, outputs=NM)
Model6.summary()

Model6.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),tfa.metrics.CohenKappa(weightage='quadratic',
                                                                                                 num_classes=6,
                                                                                             sparse_labels=True)])
Model6.fit_generator(generator=train_generator,
                   steps_per_epoch = int(no_train // batch_size),
                   epochs = 30,
                   verbose = 1,
                   validation_data = valid_generator,
                   validation_steps = int(no_valid // batch_size),
                   callbacks=keras.callbacks.EarlyStopping(monitor='val_cohen_kappa',mode='max',patience=5, restore_best_weights=True))


Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1280, 1280,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 1286, 1286, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 640, 640, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 642, 642, 64) 0           conv1_conv[0][0]                 
___________________________________________________________________________________________

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<tensorflow.python.keras.callbacks.History at 0x20bb736cf10>

In [29]:
#Första modellen verkar vara bäst; vi utvärderar den därför mot testdatat.
test_label_dict=dict(labels[['image_id','isup_grade']].values)
test_path='D:\PhDMain\Courses\DeepLearning\PANDAS\ProcessedImages\TestSet'

test_file_names = [name for name in os.listdir(test_path)]
test_x=np.ndarray(shape=(len(test_file_names),DIM,DIM,3),dtype=np.dtype('float32'))
test_y=np.ndarray(shape=len(test_file_names))
for i in range(len(test_file_names)):
    name=test_file_names[i]
    data=np.load(os.path.join(test_path,name))
    test_x[i,:,:,:]=data[SIZE]
    test_y[i]=test_label_dict[name[:-4]]

In [34]:
Model1.evaluate(test_x,test_y)



[1.4714128971099854, 0.5010040402412415, 0.6474312543869019]

In [None]:
#Vi är något bättre än för N=16 för det kvadratiska kappa-värdet, och tydligt bättre för accuracy, som nu är strax över 50%
#jämfört med 40% för N=16.