In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from skimage.io import imread, imshow
from skimage.transform import resize
import cv2
from sklearn.utils import shuffle
from keras.applications.vgg16 import VGG16

import warnings
warnings.filterwarnings('ignore')

In [None]:
#mounting google drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd gdrive/My Drive/DIP_final_project_dataset

/content/gdrive/My Drive/DIP_final_project_dataset


In [None]:
#cloning used dataset by the papers's authors
! git clone https://github.com/yiweichen04/retina_dataset.git

Cloning into 'retina_dataset'...
remote: Enumerating objects: 617, done.[K
remote: Total 617 (delta 0), reused 0 (delta 0), pack-reused 617[K
Receiving objects: 100% (617/617), 1.67 GiB | 16.43 MiB/s, done.
Resolving deltas: 100% (1/1), done.
Checking out files: 100% (602/602), done.


In [None]:
#directory of different sub-datasets
normal_dataset_dir = '/content/gdrive/MyDrive/DIP_final_project_dataset/retina_dataset/dataset/1_normal'
cataract_dataset_dir = '/content/gdrive/MyDrive/DIP_final_project_dataset/retina_dataset/dataset/2_cataract'
glaucoma_dataset_dir = '/content/gdrive/MyDrive/DIP_final_project_dataset/retina_dataset/dataset/2_glaucoma'
retina_disease_dataset_dir = '/content/gdrive/MyDrive/DIP_final_project_dataset/retina_dataset/dataset/3_retina_disease'

In [None]:
#extract path to images of every sub-datasets
import os
normal_img_paths = sorted(
    [
        os.path.join(normal_dataset_dir, fname)
        for fname in os.listdir(normal_dataset_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ]
)

cataract_img_paths = sorted(
    [
        os.path.join(cataract_dataset_dir, fname)
        for fname in os.listdir(cataract_dataset_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ]
)

glaucoma_img_paths = sorted(
    [
        os.path.join(glaucoma_dataset_dir, fname)
        for fname in os.listdir(glaucoma_dataset_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ]
)

retina_disease_img_paths = sorted(
    [
        os.path.join(retina_disease_dataset_dir, fname)
        for fname in os.listdir(retina_disease_dataset_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ]
)

In [None]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3

In [None]:
normal_dataset = np.zeros((len(normal_img_paths), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
cataract_dataset = np.zeros((len(cataract_img_paths), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
glaucoma_dataset = np.zeros((len(glaucoma_img_paths), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
retina_disease_dataset = np.zeros((len(retina_disease_img_paths), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)

In [None]:
for i, image_id in enumerate(normal_img_paths):    
    path_image =  image_id
    image = imread(path_image)
    image = resize(image, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    normal_dataset[i] = image
print(normal_dataset.shape)

(300, 224, 224, 3)


In [None]:
for i, image_id in enumerate(cataract_img_paths):    
    path_image =  image_id
    image = imread(path_image)
    image = resize(image, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    cataract_dataset[i] = image
print(cataract_dataset.shape)

(100, 224, 224, 3)


In [None]:
for i, image_id in enumerate(glaucoma_img_paths):
    path_image =  image_id
    image = imread(path_image)
    image = resize(image, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    glaucoma_dataset[i] = image
print(glaucoma_dataset.shape)

(101, 224, 224, 3)


In [None]:
for i, image_id in enumerate(retina_disease_img_paths):
    path_image =  image_id
    image = imread(path_image)
    image = resize(image, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    retina_disease_dataset[i] = image
print(retina_disease_dataset.shape)

(100, 224, 224, 3)


In [None]:
def CLAHE(img):
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    lab_planes = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(100, 100))
    lab_planes[0] = clahe.apply(lab_planes[0])
    lab = cv2.merge(lab_planes)
    img = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
    return img

In [None]:
#applying clahe to all images
for i, img in enumerate(normal_dataset):
    normal_dataset[i] = CLAHE(normal_dataset[i])

for i, img in enumerate(cataract_dataset):
    cataract_dataset[i] = CLAHE(cataract_dataset[i])

for i, img in enumerate(glaucoma_dataset):
    glaucoma_dataset[i] = CLAHE(glaucoma_dataset[i])

for i, img in enumerate(retina_disease_dataset):
    retina_disease_dataset[i] = CLAHE(retina_disease_dataset[i])

In [None]:
#extracting green channels of the images
preprocessed_normal_dataset = np.zeros((len(normal_img_paths), IMG_HEIGHT, IMG_WIDTH), dtype=np.uint8)
preprocessed_cataract_dataset = np.zeros((len(cataract_img_paths), IMG_HEIGHT, IMG_WIDTH), dtype=np.uint8)
preprocessed_glaucoma_dataset = np.zeros((len(glaucoma_img_paths), IMG_HEIGHT, IMG_WIDTH), dtype=np.uint8)
preprocessed_retina_disease_dataset = np.zeros((len(retina_disease_img_paths), IMG_HEIGHT, IMG_WIDTH), dtype=np.uint8)

for i, img in enumerate(normal_dataset):
    preprocessed_normal_dataset[i] = img[:, :, 1]

for i, img in enumerate(cataract_dataset):
    preprocessed_cataract_dataset[i] = img[:, :, 1]

for i, img in enumerate(glaucoma_dataset):
    preprocessed_glaucoma_dataset[i] = img[:, :, 1]

for i, img in enumerate(retina_disease_dataset):
    preprocessed_retina_disease_dataset[i] = img[:, :, 1]

In [None]:
#creating label arrays for each dataset
y_normal = np.full(len(normal_img_paths), 0)
y_cataract = np.full(len(cataract_img_paths), 1)
y_glaucoma = np.full(len(glaucoma_img_paths), 1)
y_retina_disease = np.full(len(retina_disease_img_paths), 1)

In [None]:
#concatanating normal dataset with each of the three desease
normal_and_cataract_data = np.concatenate((normal_dataset, cataract_dataset), axis=0)
normal_and_cataract_label = np.concatenate((y_normal, y_cataract), axis=0)
normal_and_cataract_data, normal_and_cataract_label = shuffle(normal_and_cataract_data, normal_and_cataract_label)

normal_and_glaucoma_data = np.concatenate((normal_dataset, glaucoma_dataset), axis=0)
normal_and_glaucoma_label = np.concatenate((y_normal, y_glaucoma), axis=0)
normal_and_glaucoma_data, normal_and_glaucoma_label = shuffle(normal_and_glaucoma_data, normal_and_glaucoma_label)

normal_and_retina_disease_data = np.concatenate((normal_dataset, retina_disease_dataset), axis=0)
normal_and_retina_disease_label = np.concatenate((y_normal, y_retina_disease), axis=0)
normal_and_retina_disease_data, normal_and_retina_disease_label = shuffle(normal_and_retina_disease_data, normal_and_retina_disease_label)

In [None]:
import os

import cv2
import imutils as imutils
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf # machine learning
from tqdm import tqdm # make your loops show a smart progress meter 
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sn

RANDOM_SEED = 1
IMG_SIZE = (224, 224) # size of vgg16 input

In [None]:
vgg_model = tf.keras.applications.VGG16(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(224, 224, 3),
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model = tf.keras.models.Sequential()
model.add(vgg_model)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.layers[0].trainable = False

model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dropout (Dropout)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 1)                 25089     
                                                                 
Total params: 14,739,777
Trainable params: 25,089
Non-trainable params: 14,714,688
_________________________________________________________________


In [None]:
from sklearn.model_selection import train_test_split
normal_and_cataract_label = np.expand_dims(normal_and_cataract_label, axis=-1)
X_train, X_test, y_train, y_test = train_test_split(normal_and_cataract_data, normal_and_cataract_label, test_size=0.2, random_state= 8) 

In [None]:
EPOCHS = 25
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5
)

history = model.fit(X_train, y_train, validation_split=0.1, batch_size=16, epochs=EPOCHS, callbacks=[early_stopping])
 
print("Training Done")
model.save("model.h5")

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Training Done


In [None]:
predictions = model.predict(X_test)
predictions = [0 if x < 0.5 else 1 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy(Normal/Cataract) without augmentation:", accuracy)

Test Accuracy(Normal/Cataract) without augmentation: 0.9125


In [None]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=[0.5, 1.25],
    horizontal_flip=True,
    vertical_flip=True,
    validation_split = 0.1,
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input
)

datagen.fit(normal_and_cataract_data)
history = model.fit(datagen.flow(X_train, y_train, batch_size=32, subset='training'),
                                 validation_data=datagen.flow(X_train, y_train, batch_size=8, subset='validation'),
                                                              epochs=EPOCHS, callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25


In [None]:
predictions = model.predict(X_test)
predictions = [0 if x < 0.5 else 1 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy(Normal/Cataract) with augmentation", accuracy)

Test Accuracy(Normal/Cataract) with augmentation 0.7125


In [None]:
normal_and_glaucoma_label = np.expand_dims(normal_and_glaucoma_label, axis=-1)
X_train, X_test, y_train, y_test = train_test_split(normal_and_glaucoma_data, normal_and_glaucoma_label, test_size=0.2, random_state= 8) 

In [None]:
EPOCHS = 25
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5
)

history = model.fit(X_train, y_train, validation_split=0.1, batch_size=16, epochs=EPOCHS, callbacks=[early_stopping])
 
print("Training Done")

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Training Done


In [None]:
predictions = model.predict(X_test)
predictions = [0 if x < 0.5 else 1 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy(Normal/Glaucoma) without augmentation:", accuracy)

Test Accuracy(Normal/Glaucoma) without augmentation: 0.7777777777777778


In [None]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=[0.5, 1.25],
    horizontal_flip=True,
    vertical_flip=True,
    validation_split = 0.1,
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input
)

datagen.fit(normal_and_cataract_data)
history = model.fit(datagen.flow(X_train, y_train, batch_size=32, subset='training'),
                                 validation_data=datagen.flow(X_train, y_train, batch_size=8, subset='validation'),
                                                              epochs=EPOCHS, callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25


In [None]:
predictions = model.predict(X_test)
predictions = [0 if x < 0.5 else 1 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy(Normal/Glaucoma) with augmentation", accuracy)

Test Accuracy(Normal/Glaucoma) with augmentation 0.7654320987654321


In [None]:
normal_and_retina_disease_label = np.expand_dims(normal_and_retina_disease_label, axis=-1)
X_train, X_test, y_train, y_test = train_test_split(normal_and_retina_disease_data, normal_and_retina_disease_label, test_size=0.2, random_state= 8)

In [None]:
EPOCHS = 25
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5
)

history = model.fit(X_train, y_train, validation_split=0.1, batch_size=16, epochs=EPOCHS, callbacks=[early_stopping])
 
print("Training Done")

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Training Done


In [None]:
predictions = model.predict(X_test)
predictions = [0 if x < 0.5 else 1 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy(Normal/Retina_Disease) without augmentation:", accuracy)

Test Accuracy(Normal/Retina_Disease) without augmentation: 0.7625


In [None]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=[0.5, 1.25],
    horizontal_flip=True,
    vertical_flip=True,
    validation_split = 0.1,
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input
)

datagen.fit(normal_and_cataract_data)
history = model.fit(datagen.flow(X_train, y_train, batch_size=32, subset='training'),
                                 validation_data=datagen.flow(X_train, y_train, batch_size=8, subset='validation'),
                                                              epochs=EPOCHS, callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25


In [None]:
predictions = model.predict(X_test)
predictions = [0 if x < 0.5 else 1 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy(Normal/Retina_Disease) with augmentation", accuracy)

Test Accuracy(Normal/Retina_Disease) with augmentation 0.7625


In [None]:
#creating a multi-calss dataset
y_normal = np.full(len(normal_img_paths), 0)
y_cataract = np.full(len(cataract_img_paths), 1)
y_glaucoma = np.full(len(glaucoma_img_paths), 2)
y_retina_disease = np.full(len(retina_disease_img_paths), 3)

X = np.concatenate((normal_dataset, cataract_dataset, glaucoma_dataset, retina_disease_dataset), axis=0)
y = np.concatenate((y_normal, y_cataract, y_glaucoma, y_retina_disease), axis=0)
X, y = shuffle(X, y)

In [None]:
y = np.expand_dims(y, axis=-1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state= 8)

In [None]:
from sklearn.preprocessing import OneHotEncoder
def prepare_targets(y_train, y_test):
	enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
	enc.fit(y_train.reshape(-1, 1))
	print(enc.categories_)
	y_train_enc = enc.transform(y_train.reshape(-1, 1))
	y_test_enc = enc.transform(y_test.reshape(-1, 1))
	return np.array(y_train_enc), np.array(y_test_enc)

y_train, y_test = prepare_targets(y_train, y_test)
print(y_train.shape)

[array([0, 1, 2, 3])]
(480, 4)


In [None]:
model = tf.keras.models.Sequential()
model.add(vgg_model)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(4, activation='softmax'))

model.layers[0].trainable = False

model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten_2 (Flatten)         (None, 25088)             0         
                                                                 
 dropout_2 (Dropout)         (None, 25088)             0         
                                                                 
 dense_2 (Dense)             (None, 4)                 100356    
                                                                 
Total params: 14,815,044
Trainable params: 100,356
Non-trainable params: 14,714,688
_________________________________________________________________


In [None]:
EPOCHS = 25
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5
)

history = model.fit(X_train, y_train, validation_split=0.1, batch_size=16, epochs=EPOCHS, callbacks=[early_stopping])
 
print("Training Done")

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Training Done
