### Retinal Disease Classification using CNN Model

Import required modules

In [1]:
import os
import cv2

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-08-13 03:32:45.402350: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-13 03:32:45.402469: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-13 03:32:45.541050: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading Data

In [2]:
# Path to Data
train_path = '/kaggle/input/Training_Set/Training_Set/Training'
test_path = '/kaggle/input/Test_Set/Test_Set/Test'
validation_path = '/kaggle/input/Evaluation_Set/Evaluation_Set/Validation'

train_labels_path = '/kaggle/input/Training_Set/Training_Set/RFMiD_Training_Labels.csv'
test_labels_path = '/kaggle/input/Test_Set/Test_Set/RFMiD_Testing_Labels.csv'
validation_labels_path = '/kaggle/input/Evaluation_Set/Evaluation_Set/RFMiD_Validation_Labels.csv'

In [3]:
# Loading labels
train_labels = pd.read_csv(train_labels_path)
test_labels = pd.read_csv(test_labels_path)
val_labels = pd.read_csv(validation_labels_path)

In [4]:
print("Train shape:", train_labels.shape)
print("Test shape:", test_labels.shape)
print("Validation shape:", val_labels.shape)

Train shape: (1920, 47)
Test shape: (640, 47)
Validation shape: (640, 47)


In [5]:
# to dislpay all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [6]:
print(train_labels.head())

   ID  Disease_Risk  DR  ARMD  MH  DN  MYA  BRVO  TSLN  ERM  LS  MS  CSR  ODC  \
0   1             1   1     0   0   0    0     0     0    0   0   0    0    0   
1   2             1   1     0   0   0    0     0     0    0   0   0    0    0   
2   3             1   1     0   0   0    0     0     0    0   0   0    0    0   
3   4             1   0     0   1   0    0     0     0    0   0   0    0    1   
4   5             1   1     0   0   0    0     0     0    0   1   0    0    0   

   CRVO  TV  AH  ODP  ODE  ST  AION  PT  RT  RS  CRS  EDN  RPEC  MHL  RP  CWS  \
0     0   0   0    0    0   0     0   0   0   0    0    0     0    0   0    0   
1     0   0   0    0    0   0     0   0   0   0    0    0     0    0   0    0   
2     0   0   0    0    0   0     0   0   0   0    0    0     0    0   0    0   
3     0   0   0    0    0   0     0   0   0   0    0    0     0    0   0    0   
4     0   0   0    0    0   0     0   0   0   0    0    0     0    0   0    0   

   CB  ODPM  PRH  MNF  HR 

In [7]:
# Exploratory Data Analysis
disease_counts = train_labels.iloc[:, 2:].sum().sort_values(ascending=False)
#using plotly
fig = px.bar(disease_counts, title="Disease Distribution in Training Set") 
fig.show()

In [8]:
# Preprocessing the data
# function to load the images and labels
def load_images_and_labels(image_dir, labels_df):
    images = []
    labels = []
    for index, row in labels_df.iterrows():
        img_path = os.path.join(image_dir, f"{row['ID']}.png")
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            # Resizing images to 224x224
            img = cv2.resize(img, (224, 224))
            images.append(img)
            labels.append(row['Disease_Risk'])
    return np.array(images), np.array(labels)

# Load images and labels
x_train, y_train = load_images_and_labels(train_path, train_labels)
x_test, y_test = load_images_and_labels(test_path, test_labels)
x_val, y_val = load_images_and_labels(validation_path, val_labels)

In [9]:
# Normalizing the images
x_train = x_train / 255.0
x_val = x_val / 255.0
x_test = x_test / 255.0

In [10]:
# Image Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [30]:
x_train.shape

(1920, 224, 224, 3)

#### Defining the model - VGG16

In [11]:
from keras.applications import VGG16
model_vgg = VGG16(weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [31]:
inp_layer = Input(shape=(224,224,3))
model_vgg = VGG16(weights='imagenet', input_tensor=inp_layer, include_top=False)

In [32]:
# Adding output layers

last_layer = model_vgg.output
flatten = Flatten()(last_layer)
dense1 = Dense(100, activation='relu')(flatten)
dense1 = Dense(100, activation='relu')(flatten)
dense1 = Dense(100, activation='relu')(flatten)
output_layer = Dense(1, activation='sigmoid')(flatten)

model = Model(inputs=inp_layer, outputs = output_layer)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [33]:
for layer in model.layers[:-1]:
    layer.trainable = False

Train the model

In [34]:
history = model.fit(
    x_train, y_train, batch_size=32,
    validation_data=(x_val, y_val),
    epochs=20
)

Epoch 1/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 117ms/step - accuracy: 0.7509 - loss: 0.6307 - val_accuracy: 0.8531 - val_loss: 0.3578
Epoch 2/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 88ms/step - accuracy: 0.8398 - loss: 0.3626 - val_accuracy: 0.8266 - val_loss: 0.3796
Epoch 3/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 88ms/step - accuracy: 0.8495 - loss: 0.3411 - val_accuracy: 0.8156 - val_loss: 0.4133
Epoch 4/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 88ms/step - accuracy: 0.8251 - loss: 0.3854 - val_accuracy: 0.8719 - val_loss: 0.2961
Epoch 5/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 88ms/step - accuracy: 0.8703 - loss: 0.2984 - val_accuracy: 0.8703 - val_loss: 0.2937
Epoch 6/20
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 88ms/step - accuracy: 0.8919 - loss: 0.2707 - val_accuracy: 0.8734 - val_loss: 0.2878
Epoch 7/20
[1m60/60[0m [32m━━━

Evaluate the model

In [46]:
# y_pred = (model.predict(x_test) > 0.5).astype("int32")
y_pred = []
y_pred = model.predict(x_test).round().astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step
Accuracy: 0.890625
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.87      0.77       134
           1       0.96      0.90      0.93       506

    accuracy                           0.89       640
   macro avg       0.83      0.88      0.85       640
weighted avg       0.91      0.89      0.89       640



In [47]:
cm = confusion_matrix(y_test, y_pred)
fig = px.imshow(cm, text_auto=True, title="Confusion Matrix")
fig.show()

Save the model

In [48]:
model.save('/kaggle/working/vgg16_model.h5')