<a href="https://colab.research.google.com/github/grace12021/MATH-748-Final-Project/blob/main/progression2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Kaggle Library

In [1]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
!kaggle datasets download -d andrewmvd/retinal-disease-classification

Downloading retinal-disease-classification.zip to /content
100% 7.41G/7.43G [01:14<00:00, 194MB/s]
100% 7.43G/7.43G [01:14<00:00, 107MB/s]


In [3]:
!unzip retinal-disease-classification.zip

Archive:  retinal-disease-classification.zip
  inflating: Evaluation_Set/Evaluation_Set/RFMiD_Validation_Labels.csv  
  inflating: Evaluation_Set/Evaluation_Set/Validation/1.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/10.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/100.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/101.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/102.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/103.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/104.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/105.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/106.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/107.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/108.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/109.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/11.png  
  inflating: Evaluation_Set/Evaluation_Set/Validation/

# Import Libraries and Read Dataset

In [4]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
from tqdm import tqdm
from keras.preprocessing import image
pd.options.display.max_columns = 50

In [5]:
train_df = pd.read_csv('Training_Set/Training_Set/RFMiD_Training_Labels.csv')
val_df = pd.read_csv('Evaluation_Set/Evaluation_Set/RFMiD_Validation_Labels.csv')
test_df = pd.read_csv('Test_Set/Test_Set/RFMiD_Testing_Labels.csv')

In [6]:
train_df['img_path'] = [f'Training_Set/Training_Set/Training/{id}.png' for id in train_df['ID']]
val_df['img_path'] = [f'Evaluation_Set/Evaluation_Set/Validation/{id}.png' for id in val_df['ID']]
test_df['img_path'] = [f'Test_Set/Test_Set/Test/{id}.png' for id in test_df['ID']]

In [7]:
train_df = train_df.drop(labels=['ID'],axis=1)

In [8]:
train_df = train_df.drop(labels=['Disease_Risk','ODPM', 'HR'], axis=1)
val_df = val_df.drop(labels=['ID', 'Disease_Risk', 'ODPM', 'HR'], axis=1)
test_df = test_df.drop(labels=['ID', 'Disease_Risk', 'ODPM', 'HR'], axis=1)

In [9]:
train_df.columns

Index(['DR', 'ARMD', 'MH', 'DN', 'MYA', 'BRVO', 'TSLN', 'ERM', 'LS', 'MS',
       'CSR', 'ODC', 'CRVO', 'TV', 'AH', 'ODP', 'ODE', 'ST', 'AION', 'PT',
       'RT', 'RS', 'CRS', 'EDN', 'RPEC', 'MHL', 'RP', 'CWS', 'CB', 'PRH',
       'MNF', 'CRAO', 'TD', 'CME', 'PTCR', 'CF', 'VH', 'MCA', 'VS', 'BRAO',
       'PLQ', 'HPED', 'CL', 'img_path'],
      dtype='object')

In [10]:
Y_train = list(train_df.drop(['img_path'], axis=1).columns)
Y_val = list(val_df.drop(['img_path'], axis=1).columns)
Y_test = list(test_df.drop(['img_path'], axis=1).columns)
unq_disease = len(Y_train)
print(unq_disease)

43


# Data Augmentation
In tensorflow, we can pre-process images to extend the given dataset. Some slight transformation such as rescaling, transition, rotation, zittering (in color or brightness) don't change the information in the image that much. It is called **data augmentation**. In tensorflow, there is a method named **tf.keras.preprocessing.image.ImageDataGenerator** which generates batches of tensor image data with real-time data augmentation.

In [11]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                                horizontal_flip=True,
                                                                vertical_flip=True,
                                                                rotation_range=90,
                                                                brightness_range=[0, 0.1])
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [12]:
train_gen = train_datagen.flow_from_dataframe(train_df, 
                                              x_col='img_path', 
                                              y_col=Y_train,
                                              target_size=(150,150),
                                              class_mode='raw',
                                              batch_size=16,
                                              shuffle=True)
val_gen = val_datagen.flow_from_dataframe(val_df,
                                          x_col='img_path',
                                          y_col=Y_val,
                                          target_size=(150,150),
                                          class_mode='raw',
                                          batch_size=8)
test_gen = test_datagen.flow_from_dataframe(test_df,
                                            x_col='img_path',
                                            y_col=Y_test,
                                            target_size=(150,150),
                                            class_mode='raw')

Found 1920 validated image filenames.
Found 640 validated image filenames.
Found 640 validated image filenames.


# Train Model

In [13]:
def UNet(inputs):
    # First convolution block
    x = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
    d1_con = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x)
    d1 = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(d1_con)
    
    # Second convolution block
    d2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d1)
    d2_con = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d2)
    d2 = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(d2_con)
    
    # Third convolution block
    d3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d2)
    d3_con = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d3)
    d3 = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(d3_con)
    
    # Fourth convolution block
    d4 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d3)
    d4_con = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d4)
    d4 = tf.keras.layers.MaxPool2D(pool_size=2, strides=2)(d4_con)
    
    # Bottleneck layer
    b = tf.keras.layers.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(d4)
    b = tf.keras.layers.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(b)
    
    # First upsampling block
    u1 = tf.keras.layers.Conv2DTranspose(512, 3, strides =(2,2),padding='same')(b)
    u1 = tf.keras.layers.Concatenate(axis=3)([u1, d4_con])
    u1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u1)
    u1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u1)
    
    # Second upsampling block
    u2 = tf.keras.layers.Conv2DTranspose(256, 3, strides =(2,2),padding='valid')(u1)
    u2 = tf.keras.layers.Concatenate(axis=3)([u2, d3_con])
    u2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u2)
    u2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u2)
    
    # Third upsampling block
    u3 = tf.keras.layers.Conv2DTranspose(128, 3, strides =(2,2),padding='valid')(u2)
    u3 = tf.keras.layers.Concatenate(axis=3)([u3, d2_con])
    u3 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u3)
    u3 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u3)
    
    # Fourth upsampling block
    u4 = tf.keras.layers.Conv2DTranspose(64, 3, strides =(2,2),padding='same')(u3)
    u4 = tf.keras.layers.Concatenate(axis=3)([u4, d1_con])
    u4 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u4)
    u4 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(u4)
    
    # Flatten and output
    flat = tf.keras.layers.Flatten()(u4)
    out = tf.keras.layers.Dense(units=unq_disease, activation='sigmoid')(flat)
    model = tf.keras.Model(inputs=[inputs], outputs=[out])
    return model

In [14]:
auc = tf.keras.metrics.AUC(multi_label=True,thresholds=[0,0.5])
aucpr = tf.keras.metrics.AUC(curve='PR',multi_label=True,thresholds=[0,0.5])
inputs = tf.keras.layers.Input(shape=(150,150,3))
unet = UNet(inputs)
unet.compile(optimizer='adam', loss='binary_crossentropy', metrics=[auc, aucpr])
unet.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 150, 150, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 150, 150, 64  1792        ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 conv2d_1 (Conv2D)              (None, 150, 150, 64  36928       ['conv2d[0][0]']                 
                                )                                                             

In [15]:
unet.fit(train_gen, epochs=5, validation_data=val_gen)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f291856ff50>

In [16]:
unet.evaluate(test_gen)



[0.15088757872581482, 0.415132999420166, 0.030958611518144608]