In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [2]:
# Define the paths
base_image_dir = '../archive/gaussian_filtered_images'
labels_path = '../archive/train.csv'

# Load the labels
labels_df = pd.read_csv(labels_path)

# Map the labels to the corresponding directories
label_map = {
    0: 'No_DR',
    1: 'Mild',
    2: 'Moderate',
    3: 'Severe',
    4: 'Proliferate_DR'
}

# Create image paths and labels
image_paths = []
labels = []
for index, row in labels_df.iterrows():
    image_filename = row['id_code']
    label = row['diagnosis']
    image_path = os.path.join(base_image_dir, f"{label_map[label]}/{image_filename}.png")
    image_paths.append(image_path)
    labels.append(label)

# Split the dataset into training and validation sets
X_train_paths, X_val_paths, y_train, y_val = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Define the image size and batch size
image_size = (224, 224)
batch_size = 32

# Function to preprocess a single image
def preprocess_image(image_path, target_size):
    """Load and preprocess a single image."""
    image = load_img(image_path, target_size=target_size)
    image = img_to_array(image)
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Data generator for training
train_datagen = ImageDataGenerator(preprocessing_function=lambda x: x / 255.0)

def train_generator(X_paths, y, batch_size, image_size):
    while True:
        for start in range(0, len(X_paths), batch_size):
            end = min(start + batch_size, len(X_paths))
            X_batch_paths = X_paths[start:end]
            y_batch = y[start:end]
            X_batch = np.array([preprocess_image(img_path, image_size) for img_path in X_batch_paths])
            yield X_batch, np.array(y_batch)

# Data generator for validation
val_datagen = ImageDataGenerator(preprocessing_function=lambda x: x / 255.0)

def val_generator(X_paths, y, batch_size, image_size):
    while True:
        for start in range(0, len(X_paths), batch_size):
            end = min(start + batch_size, len(X_paths))
            X_batch_paths = X_paths[start:end]
            y_batch = y[start:end]
            X_batch = np.array([preprocess_image(img_path, image_size) for img_path in X_batch_paths])
            yield X_batch, np.array(y_batch)

# Load the VGG16 model without the top layer
vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(image_size[0], image_size[1], 3))

# Freeze the layers in the base model
for layer in vgg16_base.layers:
    layer.trainable = False

# Add custom layers on top of VGG16
x = Flatten()(vgg16_base.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(5, activation='softmax')(x)  # Assuming 5 classes for the diabetic retinopathy levels

# Create the model
model = Model(inputs=vgg16_base.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(
    train_generator(X_train_paths, y_train, batch_size, image_size),
    steps_per_epoch=len(X_train_paths) // batch_size,
    validation_data=val_generator(X_val_paths, y_val, batch_size, image_size),
    validation_steps=len(X_val_paths) // batch_size,
    epochs=10  # Adjust the number of epochs as needed
)

Epoch 1/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 2s/step - accuracy: 0.5570 - loss: 3.0593 - val_accuracy: 0.7358 - val_loss: 0.8756
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m315s[0m 3s/step - accuracy: 0.6756 - loss: 0.9074 - val_accuracy: 0.7389 - val_loss: 0.8152
Epoch 3/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m286s[0m 3s/step - accuracy: 0.6981 - loss: 0.8529 - val_accuracy: 0.7318 - val_loss: 0.8169
Epoch 4/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 3s/step - accuracy: 0.6871 - loss: 0.8606 - val_accuracy: 0.7375 - val_loss: 0.7831
Epoch 5/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 3s/step - accuracy: 0.7069 - loss: 0.8209 - val_accuracy: 0.7361 - val_loss: 0.7930
Epoch 6/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 3s/step - accuracy: 0.7039 - loss: 0.8080 - val_accuracy: 0.7347 - val_loss: 0.7758
Epoch 7/10
[1m91/91[0m [32m━━━━



In [4]:
# Save the model
model.save('../models/vgg16_model.keras')

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_generator(X_val_paths, y_val, batch_size, image_size), steps=len(X_val_paths) // batch_size)
print(f"Validation Accuracy: {val_accuracy}")

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 2s/step - accuracy: 0.7416 - loss: 0.7376
Validation Accuracy: 0.7386363744735718
