## Diabetic Retinopathy Detection

Dataset found here: https://www.kaggle.com/c/diabetic-retinopathy-detection/data

Goal is to predict severity of patient's diabetic retinopathy given retina images under varying conditions

In [None]:
### Untested due to size of dataset and hardware training limitations ###
import os
import sys
import pandas as pd
import numpy as np

from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True
from skimage import io, img_as_ubyte
from skimage.transform import resize, rotate
import cv2

In [None]:
def create_directory(directory):
  if not os.path.exists(directory):
    os.makedirs(directory)

def get_img_list(path):
  return [i for i in os.listdir(path) if i != '.DS_Store']

def resize_images(path, new_path, img_size=256):
  create_directory(new_path)
  dirs = get_img_list(path)
  total = 0

  for item in dirs:
    img = io.imread(path+item)
    img = resize(img, (256,256))
    io.imsave(str(new_path + item), img_as_ubyte(img))
    total += 1
    print(f"Saving: {item, total}")

def rotate_images(path, deg_rotate, img_l):
  for l in img_l:
    img = io.imread(path + str(l) + '.jpeg')
    img = rotate(img, deg_rotate)
    io.imsave(path + str(l) + '_' + str(deg_rotate) + '.jpeg', img_as_ubyte(img))

def mirror_images(path, mirror_dir, img_l):
  for l in img_l:
    img = cv2.imread(path + str(l) + '.jpeg')
    img = cv2.flip(img, mirror_dir)
    cv2.imwrite(path + str(l) + '_mir' + '.jpeg',  img_as_ubyte(img)) 

In [None]:
resize_images(path='./train/original_data/', new_path='./train/preprocessed_data/', img_size=256)

trainLabels = pd.read_csv('./train/trainLabels_original.csv')
trainLabels['image'] = trainLabels['image'].str.rstrip('.jpeg')
trainLabels_no_DR = trainLabels[trainLabels['level'] == 0]
trainLabels_DR = trainLabels[trainLabels['level'] >= 1]

img_l_no_DR = [i for i in trainLabels_no_DR['image']]
img_l_DR = [i for i in trainLabels_DR['image']]

# mirror images with no DR once
print("Mirroring Non-DR Images")
mirror_images('./train/preprocessed_data/', 1, img_l_no_DR)

# rotate all images with any level of DR
print("Rotating 90 Degrees")
rotate_images('./train/preprocessed_data/', 90, img_l_DR)

print("Rotating 120 Degrees")
rotate_images('./train/preprocessed_data/', 120, img_l_DR)

print("Rotating 180 Degrees")
rotate_images('./train/preprocessed_data/', 180, img_l_DR)

print("Rotating 270 Degrees")
rotate_images('./train/preprocessed_data/', 270, img_l_DR)

print("Mirroring DR Images")
mirror_images('./train/preprocessed_data/', 0, img_l_DR)

trainLabels = pd.read_csv('./train/trainLabels_original.csv')

img_l = get_img_list('./train/preprocessed_data/')

trainLabels['image'] = trainLabels.loc[:, 'image'].apply(lambda x: x + '.jpeg')
new_trainLabels = pd.DataFrame({'image': img_l})
new_trainLabels['image2'] = new_trainLabels.image

# remove suffix from image names
new_trainLabels['image2'] = new_trainLabels.loc[:, 'image2'].apply(lambda x: '_'.join(x.split('_')[0:2]).strip('.jpeg') + '.jpeg')
new_trainLabels.columns = ['train_image_name', 'image']

trainLabels = pd.merge(trainLabels, new_trainLabels, how='outer', on='image')
trainLabels = trainLabels.dropna()
trainLabels.to_csv('./train/trainLabels_augmented.csv', index=False, header=True)

In [None]:
import tensorflow as tf
import keras
from keras.utils import to_categorical

nb_classes = 2
labels = pd.read_csv('./train/trainLabels_augmented.csv')
y = np.array([1 if l >= 1 else 0 for l in labels['level']]) if (nb_classes == 2) else np.array([l for l in labels['level']])
y = to_categorical(y, nb_classes)

img_l  = np.array([l for l in labels['train_image_name']])
X = np.array([np.array(Image.open('./train/preprocessed_data/' + img)) for img in img_l]).astype(np.float32)
X /= 255.0

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=14)
X_train_raw = np.array(X_train)
y_train_raw = np.array(y_train)
X_test_raw = np.array(X_test)
y_test_raw = np.array(y_test)

In [None]:
from keras.layers import Dense, Dropout, Flatten, Input 
from keras.models import Model
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D

input = Input(shape=(256, 256, 3))
conv1 = Conv2D(32, kernel_size=4, activation='relu')(input)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(16, kernel_size=4, activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
flat = Flatten()(pool2)
hidden1 = Dense(10, activation='relu')(flat)
output = Dense(1, activation='sigmoid')(hidden1)
model = Model(inputs=input, outputs=output)

# model = keras.Sequential([keras.layers.Flatten(input_shape=(786, 786, 3)), keras.layers.Dense(128, activation=tf.nn.relu), 
        # keras.layers.Dense(10, activation=tf.nn.softmax)])

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_raw, y_train_raw, epochs=5)

In [None]:
loss, acc = model.evaluate(X_test_raw, y_test_raw)
print(f"Test Loss: {loss}, Test Accuracy: {acc}")