In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import zipfile
import shutil
from google.colab import files
import json
import time
import pandas as pd

import keras
from keras.models import Model, Sequential, load_model
from keras.applications.resnet50 import ResNet50
from keras.layers import Input, Dense, Activation, Dropout, BatchNormalization,\
                          Conv2D, MaxPooling2D, Flatten, AveragePooling2D,\
                          GlobalAveragePooling2D, ZeroPadding2D
from keras.initializers import glorot_uniform
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop, Adam, Adamax, Nadam, SGD
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, \
                            classification_report

# Import PyDrive and associated libraries (to connect with GoogleDrive)
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# disable warnings
import warnings
warnings.simplefilter("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

Using TensorFlow backend.


### **Check if we are using GPU:**

In [2]:
from keras import backend as K
if K.backend() == "tensorflow":
    import tensorflow as tf
    device_name = tf.test.gpu_device_name()
    if device_name == '':
        device_name = "None"
    print('Using TensorFlow version:', tf.__version__, ', GPU:', device_name)

Using TensorFlow version: 1.15.0 , GPU: /device:GPU:0


### **Download Validation ('Control') patches from GoogleDrive:**

#### *Validation Patches were augmented with Patch_Generator, using 'stride=22' and then balanced by downsampling majority classes so we can compare accuracy of the model.*

###  **NOTE: Validation patches were generated from original, non-preprocessed images. In this way, we will ensure our model perform well at testing time when pre-processing may not be feasible. As example, being able to create masks/image annotation may not be feasible on testing data.**



In [3]:
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

file_id = '1fYVv6VwiotljBXOb2PAfWPCHVyugkbhJ' # Augmented and balanced Validation

downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile(downloaded['title'])
print('Downloaded content: "{}"'.format(downloaded['title']))
print('Root dir content: {}'.format(os.listdir()))

Downloaded content: "Control.zip"
Root dir content: ['.config', 'adc.json', 'Control.zip', 'sample_data']


### **Unzip the Validation ('Control') patches:**

In [4]:
# Remove 'Patches' dir if it already exists
if 'Control' in os.listdir():
  shutil.rmtree('./Control')
with zipfile.ZipFile(downloaded['title'],"r") as zip:
    zip.extractall()
os.remove(downloaded['title'])
print('Root dir content: {}'.format(os.listdir()))

Root dir content: ['.config', 'adc.json', 'Control', 'sample_data']


### **Let's count patches by type and location:**

In [7]:
classes = ['C1','C2-3','C4-7','C5','C6','C8','C9','C10']
val_type = 'Control'

type_pos, pos_total = 0, 0
print("\nTotal '{}' Patches per location:".format(val_type))
for cls in classes:
    folder = './{}/{}_pos'.format(val_type,cls)
    n_pos = len(os.listdir(folder))
    type_pos += n_pos
    print('total_{}: {}'.format(cls,n_pos))
print('Total {}: {}'.format(val_type,type_pos))


Total 'Control' Patches per location:
total_C1: 364
total_C2-3: 364
total_C4-7: 364
total_C5: 364
total_C6: 364
total_C8: 364
total_C9: 364
total_C10: 364
Total Control: 2912


#### **Let's build image generators, using keras.preprocessing.image.ImageDataGenerator, rescaling image pixel values from [0,  255] to [0, 1]:**

In [9]:
c1_pos_folder = './Control/C1_pos'
img = plt.imread(c1_pos_folder + '/' + os.listdir(c1_pos_folder)[:5][0])
img_size = img.shape
val_batch_size = 64

val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_directory(
        './Control',
        target_size=(img_size[0],img_size[1]),
        batch_size=val_batch_size,
        class_mode='categorical',
        shuffle=False)

Found 2912 images belonging to 8 classes.


#### **Let's check what is the data generators' index for each class:**

In [10]:
print('validation_generator.class_indices:', str(json.dumps(val_generator.class_indices, indent=2, default=str)))

validation_generator.class_indices: {
  "C10_pos": 0,
  "C1_pos": 1,
  "C2-3_pos": 2,
  "C4-7_pos": 3,
  "C5_pos": 4,
  "C6_pos": 5,
  "C8_pos": 6,
  "C9_pos": 7
}


### **Download the Model from GoogleDrive:**

In [11]:
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

file_id = '1w0u_EKaSG8zkMRtYkNjFd3IOnR3IpQsJ' # Augmented and balanced Validation

downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile(downloaded['title'])
print('Downloaded content: "{}"'.format(downloaded['title']))
print('Root dir content: {}'.format(os.listdir()))

Downloaded content: "base_model_085.h5"
Root dir content: ['.config', 'base_model_085.h5', 'adc.json', 'Control', 'sample_data']


#### **Let's evaluate the best model, on the validation set and compute relevant metrics:**

In [0]:
# load model:
res_cnn = load_model('base_model_085.h5')
#res_cnn.summary() # summarize model.

In [16]:
## Evaluate model on balanced validation patches:

X, y_true = next(val_generator)
y_pred = res_cnn.predict(X)
for i in range(1, len(val_generator)):
  X, y = next(val_generator)
  y_true = np.vstack((y_true, y))
  y_pred = np.vstack((y_pred, res_cnn.predict(X)))

y_true = np.argmax(y_true, axis=1)
y_pred = np.argmax(y_pred, axis=1)

val_acc = accuracy_score(y_true, y_pred)
#roc_auc = roc_auc_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)
class_names = [k for k in val_generator.class_indices]
c_report = classification_report(y_true, y_pred, target_names=class_names)

print('\nval_acc:\n', val_acc)
print('\nConfusion Matrix:\n', cm)
print('\nClassification Report:\n', c_report)


val_acc:
 0.8464972527472527

Confusion Matrix:
 [[363   0   1   0   0   0   0   0]
 [  2 352   3   0   0   0   7   0]
 [  0 101 254   0   0   0   9   0]
 [  0   0   1 299  62   0   2   0]
 [  0   0   0 213 151   0   0   0]
 [  0   0   1   4   0 359   0   0]
 [  0   8   0  33   0   0 323   0]
 [  0   0   0   0   0   0   0 364]]

Classification Report:
               precision    recall  f1-score   support

     C10_pos       0.99      1.00      1.00       364
      C1_pos       0.76      0.97      0.85       364
    C2-3_pos       0.98      0.70      0.81       364
    C4-7_pos       0.54      0.82      0.65       364
      C5_pos       0.71      0.41      0.52       364
      C6_pos       1.00      0.99      0.99       364
      C8_pos       0.95      0.89      0.92       364
      C9_pos       1.00      1.00      1.00       364

    accuracy                           0.85      2912
   macro avg       0.87      0.85      0.84      2912
weighted avg       0.87      0.85      0.84     