# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import cohen_kappa_score

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras.datasets import mnist
from tensorflow.keras.callbacks import EarlyStopping

import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Helper Functions

In [2]:
# Quadratic Weighted Kappa function
def Cmatrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat


def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings

def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    axquadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = y
    rater_b = y_pred
    min_rating=None
    max_rating=None
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = Cmatrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

## Loading in the Data

In [3]:
train = pd.read_csv('./train.csv')
images = np.load('blindness_images.npy')

In [9]:
images.shape

(3662, 128, 128, 3)

# Train Test Split

In [10]:
X = images
y = train['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state = 42)

y_train = utils.to_categorical(y_train, 5)
y_test = utils.to_categorical(y_test, 5)

I tried running other models before in a different notebook, however they left me wanting for more approaches that could score better. In my search was Image Augmentation, where the images are manipulated to help the model train on data that wasn't all the same. Doing so should in theory assist the model in identifying underlying patterns. What follows below is my attempt at seeing what stuck.

## 4 Conv Layer Model without Augmentation 

In [18]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
model_test = Sequential()

model_test.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_test.add(MaxPooling2D(2, 2))
model_test.add(Conv2D(64, (3,3), activation='relu'))
model_test.add(MaxPooling2D(2,2))
model_test.add(Conv2D(128, (3,3), activation='relu'))
model_test.add(MaxPooling2D(2,2))
model_test.add(Conv2D(128, (3,3), activation='relu'))
model_test.add(MaxPooling2D(2,2))

model_test.add(Flatten())

model_test.add(Dense(512, activation='relu'))
model_test.add(Dense(5, activation='softmax'))


model_test.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_test = model_test.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 00023: early stopping


In [19]:
pred_test = model_test.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_test,axis=1))

0.7367323696136592

I tested multiple convolutional layers and was surprised that even without augmentation it performed well. If there was any augmentation that could be done to improve the kappa score, I would happy.

# 4 Conv Layer with Augmentation Layer

In [20]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
data_augmentation2 = Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical", input_shape=(128,128,3)),
    layers.experimental.preprocessing.RandomRotation(0.2),
    layers.experimental.preprocessing.RandomZoom(0.1, 0.3),
    layers.experimental.preprocessing.RandomContrast(0.2),
  ]
)


model_test_aug = Sequential()

model_test_aug.add(data_augmentation2)

model_test_aug.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_test_aug.add(MaxPooling2D(2, 2))
model_test_aug.add(Conv2D(64, (3,3), activation='relu'))
model_test_aug.add(MaxPooling2D(2,2))
model_test_aug.add(Conv2D(128, (3,3), activation='relu'))
model_test_aug.add(MaxPooling2D(2,2))
model_test_aug.add(Conv2D(128, (3,3), activation='relu'))
model_test_aug.add(MaxPooling2D(2,2))

model_test_aug.add(Flatten())

model_test_aug.add(Dense(512, activation='relu'))
model_test_aug.add(Dense(5, activation='softmax'))


model_test_aug.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_test_aug = model_test_aug.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 00056: early stopping


In [21]:
pred_aug = model_test_aug.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_aug,axis=1))

0.6909811825012597

This model with augmentation did not score as well as I thought it would. It needed further tuning.

# 4 Conv Layer without Contrast and 0.1 Rotation and Zoom

In [22]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
data_augmentation2 = Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(128,128,3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1)
  ]
)


model_test_aug2 = Sequential()

model_test_aug2.add(data_augmentation2)

model_test_aug2.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_test_aug2.add(MaxPooling2D(2, 2))
model_test_aug2.add(Conv2D(64, (3,3), activation='relu'))
model_test_aug2.add(MaxPooling2D(2,2))
model_test_aug2.add(Conv2D(128, (3,3), activation='relu'))
model_test_aug2.add(MaxPooling2D(2,2))
model_test_aug2.add(Conv2D(128, (3,3), activation='relu'))
model_test_aug2.add(MaxPooling2D(2,2))

model_test_aug2.add(Flatten())

model_test_aug2.add(Dense(512, activation='relu'))
model_test_aug2.add(Dense(5, activation='softmax'))


model_test_aug2.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_test_aug2 = model_test_aug2.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 00030: early stopping


In [23]:
pred_aug2 = model_test_aug2.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_aug2,axis=1))

0.7334413962799997

Quite good! Further tuning was required.

# Same Model as above with 0.2 Rotation (Best Augmented Model)

In [26]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
data_aug_rot2 = Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(128,128,3)),
    layers.experimental.preprocessing.RandomRotation(0.2),
    layers.experimental.preprocessing.RandomZoom(0.1)
  ]
)


model_aug_rot2 = Sequential()

model_aug_rot2.add(data_aug_rot2)

model_aug_rot2.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_aug_rot2.add(MaxPooling2D(2, 2))
model_aug_rot2.add(Conv2D(64, (3,3), activation='relu'))
model_aug_rot2.add(MaxPooling2D(2,2))
model_aug_rot2.add(Conv2D(128, (3,3), activation='relu'))
model_aug_rot2.add(MaxPooling2D(2,2))
model_aug_rot2.add(Conv2D(128, (3,3), activation='relu'))
model_aug_rot2.add(MaxPooling2D(2,2))

model_aug_rot2.add(Flatten())

model_aug_rot2.add(Dense(512, activation='relu'))
model_aug_rot2.add(Dense(5, activation='softmax'))


model_aug_rot2.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_aug_rot2 = model_aug_rot2.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 00035: early stopping


In [31]:
pred_aug2 = model_aug_rot2.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_aug2,axis=1))

0.734742720382928

This was the best model so far with the highest Kappa.

# More Tuning

## Conclusion for This Notebook

Unfortunately from here on out, the scores did not get better. I believe the reason for that is because images that are too augmented cause the model to learn what an augmented retina looks like, and it is unable to bring that learning over to non-augmented images. I've left the following unsuccessful models for those who wanted to see the process.

In [28]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
data_aug_rot5 = Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(128,128,3)),
    layers.experimental.preprocessing.RandomRotation(0.5),
    layers.experimental.preprocessing.RandomZoom(0.1)
  ]
)


model_aug_rot5 = Sequential()

model_aug_rot5.add(data_aug_rot2)

model_aug_rot5.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_aug_rot5.add(MaxPooling2D(2, 2))
model_aug_rot5.add(Conv2D(64, (3,3), activation='relu'))
model_aug_rot5.add(MaxPooling2D(2,2))
model_aug_rot5.add(Conv2D(128, (3,3), activation='relu'))
model_aug_rot5.add(MaxPooling2D(2,2))
model_aug_rot5.add(Conv2D(128, (3,3), activation='relu'))
model_aug_rot5.add(MaxPooling2D(2,2))

model_aug_rot5.add(Flatten())

model_aug_rot5.add(Dense(512, activation='relu'))
model_aug_rot5.add(Dense(5, activation='softmax'))


model_aug_rot5.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_aug_rot5 = model_aug_rot5.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 00031: early stopping


In [30]:
pred_aug5 = model_aug_rot5.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_aug5,axis=1))

0.7269929076392998

In [32]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
data_rot2_zoom2 = Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(128,128,3)),
    layers.experimental.preprocessing.RandomRotation(0.2),
    layers.experimental.preprocessing.RandomZoom(0.2)
  ]
)


model_rot2_zoom2 = Sequential()

model_rot2_zoom2.add(data_aug_rot2)

model_rot2_zoom2.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_rot2_zoom2.add(MaxPooling2D(2, 2))
model_rot2_zoom2.add(Conv2D(64, (3,3), activation='relu'))
model_rot2_zoom2.add(MaxPooling2D(2,2))
model_rot2_zoom2.add(Conv2D(128, (3,3), activation='relu'))
model_rot2_zoom2.add(MaxPooling2D(2,2))
model_rot2_zoom2.add(Conv2D(128, (3,3), activation='relu'))
model_rot2_zoom2.add(MaxPooling2D(2,2))

model_rot2_zoom2.add(Flatten())

model_rot2_zoom2.add(Dense(512, activation='relu'))
model_rot2_zoom2.add(Dense(5, activation='softmax'))


model_rot2_zoom2.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_rot2_zoom2 = model_rot2_zoom2.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 00035: early stopping


In [35]:
pred_rot2_zoom2 = model_rot2_zoom2.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_rot2_zoom2,axis=1))

0.698853915926805

In [36]:
tf.random.set_seed(42)
# https://colab.research.google.com/github/lmoroney/dlaicourse/blob/master/Course%202%20-%20Part%204%20-%20Lesson%202%20-%20Notebook%20(Cats%20v%20Dogs%20Augmentation).ipynb
data_rot2_only = Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(128,128,3)),
    layers.experimental.preprocessing.RandomRotation(0.2)
  ]
)


model_rot2_only = Sequential()

model_rot2_only.add(data_aug_rot2)

model_rot2_only.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)))
model_rot2_only.add(MaxPooling2D(2, 2))
model_rot2_only.add(Conv2D(64, (3,3), activation='relu'))
model_rot2_only.add(MaxPooling2D(2,2))
model_rot2_only.add(Conv2D(128, (3,3), activation='relu'))
model_rot2_only.add(MaxPooling2D(2,2))
model_rot2_only.add(Conv2D(128, (3,3), activation='relu'))
model_rot2_only.add(MaxPooling2D(2,2))

model_rot2_only.add(Flatten())

model_rot2_only.add(Dense(512, activation='relu'))
model_rot2_only.add(Dense(5, activation='softmax'))


model_rot2_only.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

history_rot2_only = model_rot2_only.fit(X_train, y_train, validation_data = (X_test, y_test),
                 batch_size = 256,
                 epochs = 100,
                 verbose = 1,
                  callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 00033: early stopping


In [37]:
pred_rot2_only = model_rot2_only.predict(X_test)
quadratic_weighted_kappa(np.argmax(y_test,axis=1), np.argmax(pred_rot2_only,axis=1))

0.7191814746392702