In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/eyepacs/trainLabels.csv/trainLabels.csv
/kaggle/input/eyepacs/data/data/81_right.jpeg
/kaggle/input/eyepacs/data/data/17169_left.jpeg
/kaggle/input/eyepacs/data/data/38111_right.jpeg
/kaggle/input/eyepacs/data/data/23964_left.jpeg
/kaggle/input/eyepacs/data/data/13170_left.jpeg
/kaggle/input/eyepacs/data/data/23309_left.jpeg
/kaggle/input/eyepacs/data/data/18069_right.jpeg
/kaggle/input/eyepacs/data/data/40447_right.jpeg
/kaggle/input/eyepacs/data/data/29064_right.jpeg
/kaggle/input/eyepacs/data/data/12589_left.jpeg
/kaggle/input/eyepacs/data/data/39457_left.jpeg
/kaggle/input/eyepacs/data/data/17441_left.jpeg
/kaggle/input/eyepacs/data/data/36055_right.jpeg
/kaggle/input/eyepacs/data/data/31624_right.jpeg
/kaggle/input/eyepacs/data/data/43894_right.jpeg
/kaggle/input/eyepacs/data/data/21649_right.jpeg
/kaggle/input/eyepacs/data/data/21026_right.jpeg
/kaggle/input/eyepacs/data/data/20412_right.jpeg
/kaggle/input/eyepacs/data/data/3473_right.jpeg
/kaggle/input/eyepacs/data

In [3]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from sklearn.model_selection import train_test_split

# Enable mixed precision for faster training
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

# Load and preprocess the dataset
LABELS_PATH = '/kaggle/input/eyepacs/trainLabels.csv/trainLabels.csv'
IMAGES_PATH = '/kaggle/input/eyepacs/data/data/'

labels_df = pd.read_csv(LABELS_PATH)
labels_df['image_path'] = labels_df['image'].apply(lambda x: os.path.join(IMAGES_PATH, f"{x}.jpeg"))

# Split the dataset into training, validation, and test sets
train_df, val_test_df = train_test_split(labels_df, test_size=0.35, stratify=labels_df['level'], random_state=42)
val_df, test_df = train_test_split(val_test_df, test_size=0.5, stratify=val_test_df['level'], random_state=42)

# Function to preprocess the images
def preprocess_image(image, size=128):
    image = tf.image.resize(image, (size, size))  # Resize for training
    image = image / 255.0  # Normalize pixel values
    return image

def load_image(path, label, size=128):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = preprocess_image(image, size=size)
    return image, label

# Function to create dataset from dataframe
def create_dataset(df, batch_size, image_size=128):
    image_paths = df['image_path'].values
    labels = df['level'].values

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda x, y: load_image(x, y, size=image_size), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Initialize datasets for 128x128 images
batch_size = 32
train_dataset = create_dataset(train_df, batch_size)
val_dataset = create_dataset(val_df, batch_size)
test_dataset = create_dataset(test_df, batch_size)

# Load the pre-trained EfficientNetB7 model
pretrained_weights_path = '/kaggle/input/pretrained-efficientnetb7/efficientnetb7_notop.h5'
base_model = EfficientNetB7(weights=pretrained_weights_path if os.path.exists(pretrained_weights_path) else None, include_top=False, input_shape=(128, 128, 3))

# Build and compile the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')  # 5 classes for DR levels
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)
checkpoint = ModelCheckpoint('/kaggle/working/best_weights_128x128.weights.h5', save_weights_only=True, monitor='val_loss', save_best_only=True, verbose=1)
tensorboard_callback = TensorBoard(log_dir='/kaggle/working/logs', histogram_freq=1)

# Calculate steps per epoch and validation steps based on dataset size
steps_per_epoch = len(train_df) // batch_size
validation_steps = len(val_df) // batch_size

# Train the model with 128x128 images
history_128 = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[early_stopping, checkpoint, tensorboard_callback]
)

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Accuracy with 128x128 images: {test_accuracy * 100:.2f}%")

# Save the model
model.save('/kaggle/working/final_model_128x128.keras')

Epoch 1/10
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 604ms/step - accuracy: 0.6779 - loss: 1.0711
Epoch 1: val_loss improved from inf to 0.87231, saving model to /kaggle/working/best_weights_128x128.weights.h5
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m912s[0m 827ms/step - accuracy: 0.6780 - loss: 1.0710 - val_accuracy: 0.7350 - val_loss: 0.8723
Epoch 2/10
[1m  1/713[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m38:12:40[0m 193s/step - accuracy: 0.6000 - loss: 1.2505

  self.gen.throw(typ, value, traceback)



Epoch 2: val_loss did not improve from 0.87231
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 36ms/step - accuracy: 0.6000 - loss: 1.2505 - val_accuracy: 0.3333 - val_loss: 2.1633
Epoch 3/10
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 650ms/step - accuracy: 0.7278 - loss: 1.0686
Epoch 3: val_loss did not improve from 0.87231
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m622s[0m 845ms/step - accuracy: 0.7278 - loss: 1.0687 - val_accuracy: 0.7349 - val_loss: 1.9260
Epoch 4/10
[1m  1/713[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:41[0m 227ms/step - accuracy: 0.8000 - loss: 0.8075
Epoch 4: val_loss improved from 0.87231 to 0.71110, saving model to /kaggle/working/best_weights_128x128.weights.h5
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 29ms/step - accuracy: 0.8000 - loss: 0.8075 - val_accuracy: 0.6667 - val_loss: 0.7111
Epoch 5/10
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 628ms/step - 

In [4]:
# Fine-tuning with 224x224 images
print("\nFine-tuning with 224x224 images...")

# Reload datasets with larger image size
train_dataset_224 = create_dataset(train_df, batch_size, image_size=224)
val_dataset_224 = create_dataset(val_df, batch_size, image_size=224)
test_dataset_224 = create_dataset(test_df, batch_size, image_size=224)

# Adjust the model for fine-tuning (if necessary)
base_model.trainable = True  # Unfreeze all layers for fine-tuning

# Fine-tune the model with larger images
history_224 = model.fit(
    train_dataset_224,
    validation_data=val_dataset_224,
    epochs=10,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[early_stopping, checkpoint, tensorboard_callback]
)

# Evaluate the fine-tuned model
test_loss_224, test_accuracy_224 = model.evaluate(test_dataset_224)
print(f"Test Accuracy with 224x224 images: {test_accuracy_224 * 100:.2f}%")

# Save the fine-tuned model
model.save('/kaggle/working/final_model_224x224.keras')


Fine-tuning with 224x224 images...
Epoch 1/10
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680ms/step - accuracy: 0.7318 - loss: 1.1035
Epoch 1: val_loss did not improve from 0.71110
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m912s[0m 906ms/step - accuracy: 0.7318 - loss: 1.1035 - val_accuracy: 0.6123 - val_loss: nan
Epoch 2/10
[1m  1/713[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m39:12:27[0m 198s/step - accuracy: 0.7333 - loss: 2.7491
Epoch 2: val_loss did not improve from 0.71110
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 35ms/step - accuracy: 0.7333 - loss: 2.7491 - val_accuracy: 0.3333 - val_loss: 6.2064
Epoch 3/10
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 675ms/step - accuracy: 0.7215 - loss: 1.2666
Epoch 3: val_loss did not improve from 0.71110
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m640s[0m 871ms/step - accuracy: 0.7215 - loss: 1.2664 - val_accuracy: 0.7347 - val_loss: 0.9904
