In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import json
import shutil

# Path to your kaggle.json file
kaggle_json_path = '/content/drive/Othercomputers/My laptop/Current_Project_Code_Files/kaggle.json'

# Kaggle config directory
kaggle_config_dir = '/root/.config/kaggle'

# Create Kaggle config directory if it doesn't exist
os.makedirs(kaggle_config_dir, exist_ok=True)

# Copy the kaggle.json file to the Kaggle config directory
shutil.copy(kaggle_json_path, os.path.join(kaggle_config_dir, 'kaggle.json'))

# Set proper permissions for the kaggle.json file
os.chmod(os.path.join(kaggle_config_dir, 'kaggle.json'), 600)

# Now we can import and use the Kaggle API
import kaggle

# Download and unzip the dataset
kaggle.api.dataset_download_files('paultimothymooney/kermany2018', path='.', unzip=True)

print("Dataset downloaded successfully!")

Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/kermany2018
Dataset downloaded successfully!


In [5]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model

# Load the pre-trained ResNet50 model without the top layers
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze the base model
for layer in base_model.layers:
    layer.trainable = False

# Add new layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
outputs = Dense(4, activation='softmax')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [20]:
!pip install imbalanced-learn

Collecting imbalanced-learn
  Downloading imbalanced_learn-0.12.3-py3-none-any.whl.metadata (8.3 kB)
Downloading imbalanced_learn-0.12.3-py3-none-any.whl (258 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.3/258.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.12.3


In [22]:

import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils import shuffle

def load_image_data(data_dir, class_names, img_size=(224, 224), max_images_per_class=None):
    images = []
    labels = []
    for class_index, class_name in enumerate(class_names):
        class_dir = os.path.join(data_dir, class_name)
        image_count = 0
        for img_name in os.listdir(class_dir):
            if max_images_per_class and image_count >= max_images_per_class:
                break
            img_path = os.path.join(class_dir, img_name)
            # Load and resize the image
            img = load_img(img_path, target_size=img_size)
            # Convert the image to a numerical array
            img_array = img_to_array(img)
            img_array = img_array / 255.0
            images.append(img_array)
            labels.append(class_index)
            image_count += 1
    return np.array(images), np.array(labels)


data_dir = '/content/OCT2017 /train'
data_dir2 = '/content/OCT2017 /val'
class_names = ['NORMAL', 'CNV', 'DME', 'DRUSEN']


training_imgs, img_labels = load_image_data(data_dir, class_names, max_images_per_class=2000)
val_imgs, v_img_labels = load_image_data(data_dir2, class_names, max_images_per_class=10)

# Convert labels to categorical (one-hot encoding)
y_train = to_categorical(img_labels, num_classes=4)
y_val = to_categorical(v_img_labels, num_classes=4)

y_train_int = np.argmax(y_train, axis=1)

# Reshape the image data to 2D
n_samples, height, width, channels = training_imgs.shape
X_reshaped = training_imgs.reshape((n_samples, -1))

# Apply oversampling
oversample = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = oversample.fit_resample(X_reshaped, y_train_int)

# Reshape the oversampled data back to image format
X_resampled = X_resampled.reshape((-1, height, width, channels))

# Convert the labels back to one-hot encoding
y_resampled = to_categorical(y_resampled, num_classes=4)

# Shuffle the oversampled data
X_resampled, y_resampled = shuffle(X_resampled, y_resampled, random_state=42)


# Print some information about the loaded dataset
print(f"Total images loaded: {len(training_imgs)}")
print(f"Number of classes: {len(class_names)}")
for i, class_name in enumerate(class_names):
    class_count = np.sum(img_labels == i)
    print(f"  {class_name}: {class_count} images")

Total images loaded: 4000
Number of classes: 4
  NORMAL: 1000 images
  CNV: 1000 images
  DME: 1000 images
  DRUSEN: 1000 images


In [14]:
class_counts = {}

for class_name in class_names:
    class_dir = os.path.join(data_dir, class_name)
    image_count = len([name for name in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, name))])
    class_counts[class_name] = image_count

print("Total images in each class:")
for class_name, count in class_counts.items():
    print(f"  {class_name}: {count} images")

Total images in each class:
  NORMAL: 26315 images
  CNV: 37205 images
  DME: 11348 images
  DRUSEN: 8616 images


In [25]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight


# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(img_labels), y=img_labels)
class_weight_dict = dict(enumerate(class_weights))

# Set up callbacks
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_accuracy', mode='max')
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

''' Train the model using numpy arrays directly
history = model.fit(
    training_imgs,
    y_train,
    validation_data=(val_imgs, y_val),
    epochs=5,
    batch_size=124,
    callbacks=[checkpoint, early_stop],
    class_weight=class_weight_dict
)'''

# Train the model with the resampled data
history = model.fit(
    X_resampled,
    y_resampled,
    validation_data=(val_imgs, y_val),
    epochs=6,
    batch_size=124,
    callbacks=[checkpoint, early_stop],
    class_weight=class_weight_dict
)
# Generate predictions
y_pred = model.predict(val_imgs)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_val, axis=1)

# Generate and print the classification report
class_names = ['NORMAL', 'CNV', 'DME', 'DRUSEN']
report = classification_report(y_true, y_pred_classes, target_names=class_names)
print("\nClassification Report:")
print(report)

Epoch 1/6

  saving_api.save_model(


Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6

Classification Report:
              precision    recall  f1-score   support

      NORMAL       0.89      1.00      0.94         8
         CNV       0.67      1.00      0.80         8
         DME       1.00      0.38      0.55         8
      DRUSEN       0.62      0.62      0.62         8

    accuracy                           0.75        32
   macro avg       0.80      0.75      0.73        32
weighted avg       0.80      0.75      0.73        32

