In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [2]:
# Define the paths
base_image_dir = '../archive/gaussian_filtered_images'
labels_path = '../archive/train.csv'

# Load the labels
labels_df = pd.read_csv(labels_path)

# Map the labels to the corresponding directories
label_map = {
    0: 'No_DR',
    1: 'Mild',
    2: 'Moderate',
    3: 'Severe',
    4: 'Proliferate_DR'
}

In [3]:
# Create image paths and labels
image_paths = []
labels = []
for index, row in labels_df.iterrows():
    image_filename = row['id_code']
    label = row['diagnosis']
    image_path = os.path.join(base_image_dir, f"{label_map[label]}/{image_filename}.png")
    image_paths.append(image_path)
    labels.append(label)

In [4]:
# Split the dataset into training and validation sets
X_train_paths, X_val_paths, y_train, y_val = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Define the image size and batch size
image_size = (224, 224)
batch_size = 32

In [5]:
# Function to preprocess a single image
def preprocess_image(image_path, target_size):
    """Load and preprocess a single image."""
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

In [6]:
# Data generator for training
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def train_generator(X_paths, y, batch_size, image_size):
    while True:
        for start in range(0, len(X_paths), batch_size):
            end = min(start + batch_size, len(X_paths))
            X_batch_paths = X_paths[start:end]
            y_batch = y[start:end]
            X_batch = np.array([preprocess_image(img_path, image_size) for img_path in X_batch_paths])
            yield X_batch, np.array(y_batch)

In [7]:
# Data generator for validation
val_datagen = ImageDataGenerator()

def val_generator(X_paths, y, batch_size, image_size):
    while True:
        for start in range(0, len(X_paths), batch_size):
            end = min(start + batch_size, len(X_paths))
            X_batch_paths = X_paths[start:end]
            y_batch = y[start:end]
            X_batch = np.array([preprocess_image(img_path, image_size) for img_path in X_batch_paths])
            yield X_batch, np.array(y_batch)

In [8]:
# Load the ResNet50 model without the top layer
resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

# Freeze the layers in the base model
for layer in resnet_base.layers:
    layer.trainable = False

# Add custom layers on top of ResNet50
x = GlobalAveragePooling2D()(resnet_base.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(5, activation='softmax')(x)  # Assuming 5 classes for the diabetic retinopathy levels

# Create the model
model = Model(inputs=resnet_base.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

In [9]:
# Train the model
history = model.fit(
    train_generator(X_train_paths, y_train, batch_size, image_size),
    steps_per_epoch=len(X_train_paths) // batch_size,
    validation_data=val_generator(X_val_paths, y_val, batch_size, image_size),
    validation_steps=len(X_val_paths) // batch_size,
    epochs=10  # Adjust the number of epochs as needed
)

Epoch 1/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 789ms/step - accuracy: 0.4910 - loss: 1.3299 - val_accuracy: 0.6193 - val_loss: 0.9931
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 987ms/step - accuracy: 0.6162 - loss: 1.0010 - val_accuracy: 0.6576 - val_loss: 0.9788
Epoch 3/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 1s/step - accuracy: 0.6377 - loss: 0.9683 - val_accuracy: 0.6648 - val_loss: 0.9695
Epoch 4/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - accuracy: 0.6197 - loss: 0.9815 - val_accuracy: 0.6805 - val_loss: 0.9418
Epoch 5/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 1s/step - accuracy: 0.6428 - loss: 0.9833 - val_accuracy: 0.6790 - val_loss: 0.9219
Epoch 6/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 985ms/step - accuracy: 0.6394 - loss: 0.9649 - val_accuracy: 0.6776 - val_loss: 0.9247
Epoch 7/10
[1m91/91[0m [3

In [10]:
# Save the model
model.save('../models/resnet50_model.keras')

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_generator(X_val_paths, y_val, batch_size, image_size), steps=len(X_val_paths) // batch_size)
print(f"Validation Accuracy: {val_accuracy}")

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 703ms/step - accuracy: 0.6981 - loss: 0.8885
Validation Accuracy: 0.6789772510528564
