### Diabetic Retinopathy Binary Classification
#### Using transfer learning with the pretrained VGG16 model


## Import Libraries

In [None]:
from PIL import Image
import pathlib
import math
from glob import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import itertools

from keras.applications.vgg16 import VGG16
from keras import backend as K
from keras.models import Model
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Flatten, Input, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import load_img,img_to_array,ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

## Data Preparation

In [None]:
## Load Images from Each Folder
#Reduce Dataset Size by Factor (to avoid running out of RAM)
ReductionFactor = 20

# Base Path
path = ('../input/diabetic-retinopathy-2015-data-colored-resized/colored_images/colored_images/')

# Image Reading Function
def read_images(path,num_img):
    array=np.zeros((num_img,224,224,3))
    i=0
    for img in os.listdir(path):
        if (i == num_img):
            break
        img_path=path + "/" + img
        img=Image.open(img_path,mode="r")
        data=np.asarray(img,dtype="uint8")
        array[i]=data
     
        i+=1
    return array

# No DR
noDR_dir = os.path.join(path, 'No_DR')
n_noDR=math.ceil(len(glob(os.path.join(path, 'No_DR/*')))/ReductionFactor)
X_noDR=read_images(noDR_dir,n_noDR)
X_noDR=X_noDR.astype(np.uint8)
print("No DR Shape: ",X_noDR.shape)

# Mild DR
mildDR_dir = os.path.join(path, 'Mild')
n_mildDR=math.ceil(len(glob(os.path.join(path, 'Mild/*')))/ReductionFactor)
X_mildDR=read_images(mildDR_dir,n_mildDR)
X_mildDR=X_mildDR.astype(np.uint8)
print("Mild DR Shape: ",X_mildDR.shape)

# Moderate DR
modDR_dir = os.path.join(path, 'Moderate')
n_modDR=math.ceil(len(glob(os.path.join(path, 'Moderate/*')))/ReductionFactor)
X_modDR=read_images(modDR_dir,n_modDR)
X_modDR=X_modDR.astype(np.uint8)
print("Moderate DR Shape: ",X_modDR.shape)

# Proliferate DR
proDR_dir = os.path.join(path, 'Proliferate_DR')
n_proDR=math.ceil(len(glob(os.path.join(path, 'Proliferate_DR/*')))/ReductionFactor)
X_proDR=read_images(proDR_dir,n_proDR)
X_proDR=X_proDR.astype(np.uint8)
print("Proliferate DR Shape: ",X_proDR.shape)

# Severe DR
sevDR_dir = os.path.join(path, 'Severe')
n_sevDR=math.ceil(len(glob(os.path.join(path, 'Severe/*')))/ReductionFactor)
X_sevDR=read_images(sevDR_dir,n_sevDR)
X_sevDR=X_sevDR.astype(np.uint8)
print("Severe DR Shape: ",X_sevDR.shape)

In [None]:
## Show an Image from Each Category
fig=plt.figure()
fig.suptitle('Left to Right: No DR, Mild, Moderate, Proliferate, Severe')

# No DR
fig.add_subplot(1,5,1)
plt.imshow(X_noDR[5])
plt.axis("off")

# Mild DR
fig.add_subplot(1,5,2)
plt.imshow(X_mildDR[5])
plt.axis("off")

# Moderate DR
fig.add_subplot(1,5,3)
plt.imshow(X_modDR[5])
plt.axis("off")

# Proliferate DR
fig.add_subplot(1,5,4)
plt.imshow(X_proDR[5])
plt.axis("off")

# Severe DR
fig.add_subplot(1,5,5)
plt.imshow(X_sevDR[5])
plt.axis("off")

plt.show()

In [None]:
## Combine Arrays to Single X Array
# Concatenate
X=np.concatenate((X_noDR,X_mildDR,X_modDR,X_proDR,X_sevDR),axis=0)
print("X Shape: ",X.shape)

# Delete Arrays to Save RAM
del X_noDR, X_mildDR, X_modDR, X_proDR, X_sevDR

In [None]:
## Normalise RGB Values to be in Range 0-1
X = X / 255.0

In [None]:
## Create Labels Array Y
# No DR
zeros=np.zeros(n_noDR)

# DR
ones=np.ones(n_mildDR+n_modDR+n_proDR+n_sevDR)

Y = np.concatenate((zeros,ones),axis=0)
print("Y Shape: ",Y.shape)
## Create Class Weights Dict
weights={0: (len(zeros)/(len(zeros)+len(ones))), 1: (len(ones)/(len(zeros)+len(ones)))}

In [None]:
## Shuffle Data
X, Y = shuffle(X, Y)

In [None]:
## Split into Training, Validation and Test (and one-hot-encode labels arrays)
# Split
(X_train, X_TestAndVal, Y_train, Y_TestAndVal) = train_test_split(X, Y, test_size=0.20)
(X_val, X_test, Y_val, Y_test) = train_test_split(X_TestAndVal, Y_TestAndVal, test_size=0.50)
print("X_train Shape: ",X_train.shape)
print("X_val Shape: ",X_val.shape)
print("X_test Shape: ",X_test.shape)

In [None]:
## Show How Many Images of Each Category are in Training Set
sns.countplot(Y_train)

In [None]:
## Show How Many Images of Each Category are in Validation Set
sns.countplot(Y_val)

In [None]:
## Show How Many Images of Each Category are in Test Set
sns.countplot(Y_test)

In [None]:
del X, Y, X_TestAndVal, Y_TestAndVal

## Build Model

In [None]:
pretrained_model = VGG16(input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3]), include_top=False, weights="imagenet")
#pretrained_model.summary()

In [None]:
for layer in pretrained_model.layers[:15]:
    layer.trainable = False
 
for layer in pretrained_model.layers[13:]:
    layer.trainable = True

In [None]:
layer_dict = dict([(layer.name, layer) for layer in pretrained_model.layers])

x = layer_dict['block5_pool'].output

x = Flatten()(x)
#Fully connected layer
x = Dense(256, activation='relu')(x)
x = Dropout(0.2)(x)
#this is the final layer so the size of output in this layer is equal to the number of class in our problem
x = Dense(1, activation='sigmoid')(x)
#create the new model
custom_model = Model(pretrained_model.input, outputs=x)

In [None]:
custom_model.summary()

In [None]:
custom_model.compile(optimizer='adam', loss = 'BinaryCrossentropy', metrics=['AUC'])

## Train the Trainable Layers

In [None]:
#train the model
History = custom_model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=15, batch_size=200, class_weight = weights)

In [None]:
plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.plot(History.history["loss"],label="train loss")
plt.plot(History.history["val_loss"],label="val loss")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(20,10))
plt.subplot(1,2,2)
plt.plot(History.history["accuracy"],label="train accuracy")
plt.plot(History.history["val_accuracy"],label="val accuracy")
plt.legend()
plt.show()

# Performance on Test Set

In [None]:
## Test Accuracy
score, acc = custom_model.evaluate(X_test, Y_test,verbose = 0)
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
## Confusion Matrix
predictions = custom_model.predict(X_test)
print(predictions)
print(Y_test)