In [1]:
import os
import shutil
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, LeakyReLU, Dropout
from sklearn.model_selection import KFold
import pandas as pd
from IPython.display import FileLink

# ✅ Configuration
MODEL_NAME = "CustomVGG16"
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 75  # You can set to 100 for longer training
NUM_FOLDS = 5
SEED = 42
IMG_DIR = "/kaggle/input/augmented-dr-dataset/Augmented_DR_Dataset"
MODEL_SAVE_DIR = f"/kaggle/working/{MODEL_NAME}"
RESULTS_CSV_PATH = f"{MODEL_SAVE_DIR}/training_results.csv"
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

# ✅ Load Dataset
image_files = []
for root, _, files in os.walk(IMG_DIR):
    for file in files:
        if file.endswith('.png'):
            image_files.append(os.path.join(root, file))

print(f"Found {len(image_files)} image files.")

# ✅ Extract labels from subfolder names
labels = [os.path.basename(os.path.dirname(f)) for f in image_files]

# ✅ Create DataFrame
data_df = pd.DataFrame({"filename": image_files, "label": labels})
print(f"Total Images Found: {len(data_df)}")
class_names = sorted(data_df["label"].unique())

# ✅ Define Custom VGG16-based Model
def build_custom_vgg16(input_shape=(128, 128, 3), num_classes=5):
    base_model = VGG16(include_top=False, weights=None, input_shape=input_shape)

    model = Sequential([
        base_model,
        Flatten(),
        Dense(128),
        BatchNormalization(),
        LeakyReLU(alpha=0.01),
        Dropout(0.5),

        Dense(64),
        BatchNormalization(),
        LeakyReLU(alpha=0.01),
        Dropout(0.5),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# ✅ K-Fold Cross Validation
kf = KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=SEED)
results = []

# ✅ Training Loop
for fold, (train_idx, val_idx) in enumerate(kf.split(data_df)):
    print(f"\n========= 🏆 Fold {fold+1}/{NUM_FOLDS} =========")

    train_df, val_df = data_df.iloc[train_idx], data_df.iloc[val_idx]

    # ✅ Data Augmentation
    train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=20, horizontal_flip=True)
    val_datagen = ImageDataGenerator(rescale=1./255)

    train_gen = train_datagen.flow_from_dataframe(train_df, x_col="filename", y_col="label", 
                                                  target_size=IMG_SIZE, batch_size=BATCH_SIZE, 
                                                  class_mode="categorical")
    val_gen = val_datagen.flow_from_dataframe(val_df, x_col="filename", y_col="label", 
                                              target_size=IMG_SIZE, batch_size=BATCH_SIZE, 
                                              class_mode="categorical")

    # ✅ Build Model
    model = build_custom_vgg16(input_shape=(*IMG_SIZE, 3), num_classes=len(class_names))

    # ✅ Checkpointing
    fold_model_name = f"{MODEL_NAME}_fold_{fold+1}.keras"
    checkpoint_path = os.path.join(MODEL_SAVE_DIR, fold_model_name)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_best_only=True, monitor='val_accuracy', mode='max')

    # ✅ Train Model
    history = model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS, callbacks=[checkpoint])

    # ✅ Best Results
    best_val_acc = max(history.history['val_accuracy'])
    best_val_loss = min(history.history['val_loss'])

    results.append({
        "Model": MODEL_NAME,
        "Fold": fold + 1,
        "Best Val Accuracy": best_val_acc,
        "Best Val Loss": best_val_loss
    })

# ✅ Save Results CSV
results_df = pd.DataFrame(results)
results_df.to_csv(RESULTS_CSV_PATH, index=False)
print(f"\n📊 Training results saved to: {RESULTS_CSV_PATH}")
print(f"🎯 Best models saved at: {MODEL_SAVE_DIR}")

# ✅ Zip and Provide Download Link
shutil.make_archive(MODEL_SAVE_DIR, 'zip', MODEL_SAVE_DIR)
display(FileLink(f"{MODEL_SAVE_DIR}.zip"))


Found 3493 image files.
Total Images Found: 3493

Found 2794 validated image filenames belonging to 5 classes.
Found 699 validated image filenames belonging to 5 classes.




Epoch 1/75


  self._warn_if_super_not_called()


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 585ms/step - accuracy: 0.2760 - loss: 1.9636 - val_accuracy: 0.4664 - val_loss: 1.2686
Epoch 2/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 218ms/step - accuracy: 0.5942 - loss: 1.0658 - val_accuracy: 0.6853 - val_loss: 0.8579
Epoch 3/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 204ms/step - accuracy: 0.6721 - loss: 0.9715 - val_accuracy: 0.6481 - val_loss: 1.0123
Epoch 4/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 213ms/step - accuracy: 0.6869 - loss: 0.8726 - val_accuracy: 0.6767 - val_loss: 1.6276
Epoch 5/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 207ms/step - accuracy: 0.7234 - loss: 0.8175 - val_accuracy: 0.6767 - val_loss: 0.8530
Epoch 6/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 211ms/step - accuracy: 0.7273 - loss: 0.7887 - val_accuracy: 0.5494 - val_loss: 1.0234
Epoch 7/75
[1m88/88[0m [32m━━━



Epoch 1/75


  self._warn_if_super_not_called()


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 262ms/step - accuracy: 0.3321 - loss: 1.7565 - val_accuracy: 0.5823 - val_loss: 2.8040
Epoch 2/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 208ms/step - accuracy: 0.5959 - loss: 1.0789 - val_accuracy: 0.5064 - val_loss: 1.1510
Epoch 3/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 204ms/step - accuracy: 0.6852 - loss: 0.8945 - val_accuracy: 0.5064 - val_loss: 1.3013
Epoch 4/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 214ms/step - accuracy: 0.7151 - loss: 0.8392 - val_accuracy: 0.5994 - val_loss: 1.1724
Epoch 5/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 221ms/step - accuracy: 0.7030 - loss: 0.8047 - val_accuracy: 0.6824 - val_loss: 1.1152
Epoch 6/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 263ms/step - accuracy: 0.7276 - loss: 0.7836 - val_accuracy: 0.7210 - val_loss: 0.8590
Epoch 7/75
[1m88/88[0m [32m━━━



Epoch 1/75


  self._warn_if_super_not_called()


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 517ms/step - accuracy: 0.3962 - loss: 1.5415 - val_accuracy: 0.6452 - val_loss: 1.9022
Epoch 2/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 522ms/step - accuracy: 0.6261 - loss: 1.0366 - val_accuracy: 0.7096 - val_loss: 0.8232
Epoch 3/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 253ms/step - accuracy: 0.6903 - loss: 0.8665 - val_accuracy: 0.5336 - val_loss: 1.4858
Epoch 4/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 218ms/step - accuracy: 0.6789 - loss: 0.8530 - val_accuracy: 0.6152 - val_loss: 0.8326
Epoch 5/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 250ms/step - accuracy: 0.6754 - loss: 0.8614 - val_accuracy: 0.7382 - val_loss: 0.7927
Epoch 6/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 256ms/step - accuracy: 0.7144 - loss: 0.8124 - val_accuracy: 0.5336 - val_loss: 1.7312
Epoch 7/75
[1m88/88[0m [32m━━━



Epoch 1/75


  self._warn_if_super_not_called()


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 468ms/step - accuracy: 0.2950 - loss: 1.8887 - val_accuracy: 0.6991 - val_loss: 1.2140
Epoch 2/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 225ms/step - accuracy: 0.6374 - loss: 1.0376 - val_accuracy: 0.6777 - val_loss: 1.0415
Epoch 3/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 216ms/step - accuracy: 0.6636 - loss: 0.9240 - val_accuracy: 0.7135 - val_loss: 0.8394
Epoch 4/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 263ms/step - accuracy: 0.7248 - loss: 0.7995 - val_accuracy: 0.7264 - val_loss: 0.8144
Epoch 5/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 363ms/step - accuracy: 0.6988 - loss: 0.8208 - val_accuracy: 0.7006 - val_loss: 0.8535
Epoch 6/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 218ms/step - accuracy: 0.7257 - loss: 0.7872 - val_accuracy: 0.5874 - val_loss: 1.0239
Epoch 7/75
[1m88/88[0m [32m━━━



Epoch 1/75


  self._warn_if_super_not_called()


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 248ms/step - accuracy: 0.3533 - loss: 1.7491 - val_accuracy: 0.5258 - val_loss: 1.4044
Epoch 2/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 228ms/step - accuracy: 0.6247 - loss: 1.0464 - val_accuracy: 0.7235 - val_loss: 0.8726
Epoch 3/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 210ms/step - accuracy: 0.6721 - loss: 0.9087 - val_accuracy: 0.4900 - val_loss: 1.7977
Epoch 4/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 203ms/step - accuracy: 0.6822 - loss: 0.8695 - val_accuracy: 0.5244 - val_loss: 1.4636
Epoch 5/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 209ms/step - accuracy: 0.6910 - loss: 0.8641 - val_accuracy: 0.6490 - val_loss: 0.9406
Epoch 6/75
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 214ms/step - accuracy: 0.7083 - loss: 0.8432 - val_accuracy: 0.7292 - val_loss: 0.7972
Epoch 7/75
[1m88/88[0m [32m━━━