In [None]:
!unzip /content/Glaucoma.zip

In [None]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
import os

file_path = "/content/ORIGA/ORIGA/Images"

# Check if the directory exists before listing files
if os.path.exists(file_path) and os.path.isdir(file_path):
    jpg_files = [file for file in os.listdir(file_path) if file.lower().endswith(".jpg")]
    print("Number of images:", len(jpg_files))
else:
    print("Directory not found:", file_path)

In [None]:
import os
import pandas as pd

file_path = "/content/glaucoma.csv"

# Check if file exists
if os.path.exists(file_path):
    data = pd.read_csv(file_path)
    print(f"Number of rows: {data.shape[0]}")
    print(f"Number of columns: {data.shape[1]}")
    print("Column names:", data.columns.tolist())

    # Display first few rows for better understanding
    display(data.head())  # Works in Jupyter/Kaggle environments
else:
    print(f"File not found: {file_path}")

In [None]:
for column in data.columns:
    unique_values = data[column].unique()
    num_unique = len(unique_values)

    print(f"Column: '{column}' | Unique Values: {num_unique}")

    # Print only first 10 unique values if too many
    if num_unique <= 10:
        print("Values:", unique_values, "\n")
    else:
        print("First 10 unique values:", unique_values[:10], "...\n")

In [None]:
data.info()
print("\nMissing Values Per Column:\n")
print(data.isnull().sum())  # or data.isna().sum()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Check if 'Glaucoma' column exists to avoid errors
if 'Glaucoma' in data.columns:
    # Count occurrences of each class
    glaucoma_counts = data['Glaucoma'].value_counts()

    # Print the counts to check for imbalance
    print("Glaucoma distribution:\n", glaucoma_counts)

    # Plot bar chart to visualize class distribution
    plt.figure(figsize=(6, 4))
    sns.barplot(x=glaucoma_counts.index, y=glaucoma_counts.values, palette="viridis")

    # Add labels and title
    plt.title('Distribution of Glaucoma Classes')
    plt.xlabel('Glaucoma (0: No, 1: Yes)')
    plt.ylabel('Number of Samples')
    plt.xticks(rotation=0)

    # Show plot
    plt.show()
else:
    print("Column 'Glaucoma' not found in dataset!")

In [None]:
import os
import shutil
import pandas as pd

# Paths
base_dir = "/content/ORIGA/ORIGA"
csv_file_path = "/content/glaucoma.csv"
image_dir = os.path.join(base_dir, "Images")
output_dir = "/content/organized_data"

# Load the CSV file safely
if os.path.exists(csv_file_path):
    data = pd.read_csv(csv_file_path)
    print(f"CSV file loaded successfully! Shape: {data.shape}")
else:
    raise FileNotFoundError(f"CSV file not found at: {csv_file_path}")

# Check if 'Glaucoma' and 'Filename' columns exist
required_columns = {"Glaucoma", "Filename"}
if not required_columns.issubset(data.columns):
    raise KeyError(f"Missing required columns! Expected {required_columns}, but found {set(data.columns)}")

# Create output directories for 'yes' and 'no'
categories = ["yes", "no"]
subfolders = ["images", "csv"]

for category in categories:
    for subfolder in subfolders:
        os.makedirs(os.path.join(output_dir, category, subfolder), exist_ok=True)

# Separate data based on 'Glaucoma' values
yes_data = data[data["Glaucoma"] == 1]
no_data = data[data["Glaucoma"] == 0]

# Function to organize images and save CSVs
def organize_data(subset_data, category):
    images_folder = os.path.join(output_dir, category, "images")
    csv_folder = os.path.join(output_dir, category, "csv")

    copied_files = 0  # Track successful copies

    for _, row in subset_data.iterrows():
        filename = row["Filename"]
        src_path = os.path.join(image_dir, filename)
        dest_path = os.path.join(images_folder, filename)

        # Copy image only if it exists
        if os.path.exists(src_path):
            shutil.copy(src_path, dest_path)
            copied_files += 1
        else:
            print(f"Warning: File not found - {src_path}")

    # Save corresponding CSV in the 'csv' subfolder
    csv_path = os.path.join(csv_folder, "data.csv")
    subset_data.to_csv(csv_path, index=False)

    print(f"{category.capitalize()} Data: {copied_files} images copied, CSV saved at {csv_path}")

# Organize data for 'yes' and 'no' classes
organize_data(yes_data, "yes")
organize_data(no_data, "no")

print("\n Data organization complete! Images and CSVs are stored in 'yes' and 'no' folders.")

In [None]:
import os

# Define paths
no_path = "/content/organized_data/no/images"
yes_path = "/content/organized_data/yes/images"

# Function to count JPG images in a directory
def count_images(directory, label):
    if os.path.exists(directory):
        files = [file for file in os.listdir(directory) if file.lower().endswith(".jpg")]
        print(f"Number of images in {label} directory: {len(files)}")
    else:
        print(f"Warning: Directory not found - {directory}")

# Count images in 'no' and 'yes' directories
count_images(no_path, "no")
count_images(yes_path, "yes")

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Paths
yes_images_folder = "/content/organized_data/yes/images"
output_dir = "/content/organized_data/aug_yes"
aug_images_folder = os.path.join(output_dir, "images")
aug_csv_folder = os.path.join(output_dir, "csv")
yes_csv_path = "/content/organized_data/yes/csv/data.csv"

# Load the CSV file
yes_data = pd.read_csv(yes_csv_path)

# Create output directories for augmented data
os.makedirs(aug_images_folder, exist_ok=True)
os.makedirs(aug_csv_folder, exist_ok=True)

# Define augmentation transformations
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest"
)

# List to store new rows for CSV
augmented_rows = []

# Augment Images and Create New Data
for _, row in yes_data.iterrows():
    filename = row["Filename"]
    src_path = os.path.join(yes_images_folder, filename)

    if os.path.exists(src_path):
        # Load the original image
        img = Image.open(src_path).convert("RGB")  # Ensure 3-channel image
        img_array = np.expand_dims(np.array(img), axis=0)

        # Save the original image to the new folder
        original_path = os.path.join(aug_images_folder, filename)
        img.save(original_path)

        # Store the original image in CSV
        new_row = row.copy()
        new_row["Filename"] = filename
        augmented_rows.append(new_row)

        # Generate 2 augmented images
        i = 1
        for batch in datagen.flow(img_array, batch_size=1):
            augmented_filename = f"{os.path.splitext(filename)[0]}_{i}.jpg"
            augmented_image_path = os.path.join(aug_images_folder, augmented_filename)

            # Convert and save augmented image
            augmented_img = Image.fromarray(batch[0].astype("uint8"))
            augmented_img.save(augmented_image_path)

            # Store augmented image details in CSV
            new_augmented_row = row.copy()
            new_augmented_row["Filename"] = augmented_filename
            augmented_rows.append(new_augmented_row)

            i += 1
            if i > 2:  # Stop after generating exactly 2 augmented images
                break

# Save updated CSV
augmented_data = pd.DataFrame(augmented_rows)
aug_csv_path = os.path.join(aug_csv_folder, "data.csv")
augmented_data.to_csv(aug_csv_path, index=False)

print(f"Augmented images saved to: {aug_images_folder}")
print(f"Updated CSV saved to: {aug_csv_path}")

In [None]:
import os

# Path to the augmented images folder
file_path = "/content/organized_data/aug_yes/images"

# Check if the directory exists
if os.path.exists(file_path) and os.path.isdir(file_path):
    # List all files in the directory
    all_files = os.listdir(file_path)

    # Filter only `.jpg` images
    aug_files = [file for file in all_files if file.lower().endswith(".jpg")]

    # Count the number of images
    print(f"Number of total augmented images: {len(aug_files)}")
else:
    print(f"Error: The directory '{file_path}' does not exist!")

In [None]:
import os
import pandas as pd

# Path to the augmented CSV file
file_path = "/content/organized_data/aug_yes/csv/data.csv"

# Check if the file exists before reading
if os.path.exists(file_path) and os.path.isfile(file_path):
    try:
        # Load the CSV file
        df = pd.read_csv(file_path)

        # Check if the file is empty
        if df.empty:
            print(f"Warning: The CSV file '{file_path}' is empty.")
        else:
            # Display CSV details
            print(f"Successfully loaded '{file_path}'")
            print("Number of rows:", df.shape[0])
            print("Number of columns:", df.shape[1])
            print("Column names:", df.columns.tolist())

    except Exception as e:
        print(f" Error reading the CSV file: {e}")

else:
    print(f" Error: The file '{file_path}' does not exist!")

In [None]:
import os
import matplotlib.pyplot as plt

# Paths to the image folders
aug_yes_images_folder = "/content/organized_data/aug_yes/images"
no_images_folder = "/content/organized_data/no/images"

# Function to count images in a folder
def count_images(folder_path):
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        return len([img for img in os.listdir(folder_path) if img.lower().endswith(".jpg")])
    else:
        print(f"Warning: The folder '{folder_path}' does not exist!")
        return 0

# Count images
num_aug_yes_images = count_images(aug_yes_images_folder)
num_no_images = count_images(no_images_folder)

# Data for the bar chart
categories = ["Augmented Yes", "No"]
counts = [num_aug_yes_images, num_no_images]

# Check if there's data to plot
if sum(counts) == 0:
    print("No images found in both folders. Skipping the plot.")
else:
    # Plot the bar chart
    plt.figure(figsize=(8, 5))
    plt.bar(categories, counts, color=["blue", "green"])
    plt.title("Comparison of Image Counts Between Augmented Yes and No Folders")
    plt.xlabel("Category")
    plt.ylabel("Number of Images")
    plt.ylim(0, max(counts) + 10)  # Adjust y-axis for better visualization
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.show()

In [None]:
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.utils import to_categorical

# Paths for aug_yes and no
aug_yes_csv_path = "/content/organized_data/aug_yes/csv/data.csv"
aug_yes_images_folder = "/content/organized_data/aug_yes/images"

no_csv_path = "/content/organized_data/no/csv/data.csv"
no_images_folder = "/content/organized_data/no/images"

# Load CSVs
data_yes = pd.read_csv(aug_yes_csv_path)
data_yes['label'] = 1  # Assign label 1 for "yes"

data_no = pd.read_csv(no_csv_path)
data_no['label'] = 0  # Assign label 0 for "no"

# Combine both classes and shuffle
data = pd.concat([data_yes, data_no], ignore_index=True)
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Split data into train, validation, and test sets (70:15:15) while maintaining label balance
train_data, temp_data = train_test_split(data, test_size=0.3, stratify=data['label'], random_state=42)
valid_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_data['label'], random_state=42)

# Function to preprocess images and create data lists
def preprocess_and_load_data(data_subset, folder_paths):
    images = []
    labels = []

    for _, row in data_subset.iterrows():
        filename = row["Filename"]
        label = row["label"]
        # Determine the folder based on the label
        image_folder = folder_paths['yes'] if label == 1 else folder_paths['no']
        image_path = os.path.join(image_folder, filename)

        if os.path.exists(image_path):
            # Load the image and resize to (224, 224) for ResNet50
            img = load_img(image_path, target_size=(224, 224))
            img_array = img_to_array(img)
            img_preprocessed = preprocess_input(img_array)  # Preprocess for ResNet50

            images.append(img_preprocessed)
            labels.append(label)

    return np.array(images), np.array(labels)

# Define image folder paths for both labels
folder_paths = {
    "yes": aug_yes_images_folder,
    "no": no_images_folder
}

# Preprocess and load data for train, validation, and test sets
train_images, train_labels = preprocess_and_load_data(train_data, folder_paths)
valid_images, valid_labels = preprocess_and_load_data(valid_data, folder_paths)
test_images, test_labels = preprocess_and_load_data(test_data, folder_paths)

# One-hot encode the labels
num_classes = 2  # Glaucoma has two classes: 0 and 1
train_labels = to_categorical(train_labels, num_classes=num_classes)
valid_labels = to_categorical(valid_labels, num_classes=num_classes)
test_labels = to_categorical(test_labels, num_classes=num_classes)

# Output the shapes of the data
print(f"Train images shape: {train_images.shape}")
print(f"Train labels shape: {train_labels.shape}")
print(f"Validation images shape: {valid_images.shape}")
print(f"Validation labels shape: {valid_labels.shape}")
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")

# Print class distribution in each split
print("Class distribution in Train:", np.sum(train_labels, axis=0))
print("Class distribution in Validation:", np.sum(valid_labels, axis=0))
print("Class distribution in Test:", np.sum(test_labels, axis=0))


In [None]:
# Define data augmentation for the training set
data_gen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Define ResNet50 model
def create_resnet50_model(input_shape, num_classes):
    # Load ResNet50 base model with pre-trained weights
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

    # Freeze the base model layers for transfer learning
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Compile the model
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss=tf.losses.CategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

In [None]:
def lr_schedule(epoch, lr):
    # Example: Decrease learning rate by 10% every 5 epochs
    if epoch % 5 == 0 and epoch > 0:
        lr *= 0.9
    return float(lr)  # Ensure the return value is a float

# Create LearningRateScheduler callback with the corrected schedule function
learning_rate_scheduler = LearningRateScheduler(lr_schedule)

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

In [None]:
# Model configuration
input_shape = (224, 224, 3)
num_classes = 2  # Yes and No classes
model = create_resnet50_model(input_shape, num_classes)

# Training the model
history = model.fit(
    data_gen.flow(train_images, train_labels, batch_size=32),
    epochs=50,
    validation_data=(valid_images, valid_labels),
    callbacks=[early_stopping, learning_rate_scheduler]
)


In [None]:
# Create directory if it doesn't exist
save_dir = '/content/drive/MyDrive'
os.makedirs(save_dir, exist_ok=True)

# Ensure file paths are valid
weights_path = './resnet50_model_weights.weights.h5'
model_path = './resnet50_model.h5'

# Debug paths
print(f"Saving weights to: {weights_path}")
print(f"Saving model to: {model_path}")

# Save weights and model
model.save_weights(weights_path)
model.save(model_path)

print("Model and weights saved successfully!")

In [None]:
# Evaluate the model
results = model.evaluate(valid_images, valid_labels, batch_size=32)
print(f"Validation Loss: {results[0]}, Validation Accuracy: {results[1]}")

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Debug: Check test shapes and content
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")
print(f"Test labels content: {test_labels[:10]}")  # Sample labels

# If test_labels is already in integer format, use it directly
test_labels_int = test_labels if len(test_labels.shape) == 1 else np.argmax(test_labels, axis=1)

# Load your trained model
model = tf.keras.models.load_model("/content/resnet50_model.h5")  # Path to your trained model

# Predict using the trained model
predictions = model.predict(test_images)
predicted_classes = np.argmax(predictions, axis=1)  # Convert predictions to class indices

# Calculate accuracy
accuracy = accuracy_score(test_labels_int, predicted_classes)
print(f"Manual Test Accuracy: {accuracy * 100:.2f}%")

# Classification Report
unique_classes = np.unique(test_labels_int)  # Get unique classes
target_names = [f"Class {cls}" for cls in unique_classes]  # Dynamically create target names

print("\nClassification Report:")
print(classification_report(test_labels_int, predicted_classes, target_names=target_names))

# Confusion Matrix
print("\nConfusion Matrix:")
conf_matrix = confusion_matrix(test_labels_int, predicted_classes)
print(conf_matrix)

# Optional: Visualize the confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=target_names, yticklabels=target_names)
plt.title("Confusion Matrix (RESNET50)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

#  print predictions for each image
for i, (true_label, pred_label) in enumerate(zip(test_labels_int, predicted_classes)):
    print(f"Image {i + 1}: True Label: {true_label}, Predicted Label: {pred_label}")

In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

fpr, tpr, thresholds = roc_curve(test_labels_int, predictions[:, 1]) # Assuming Glaucoma is class 1. Adjust if different.
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve (ResNet50)')
plt.legend(loc="lower right")
plt.show()


In [None]:
import matplotlib.pyplot as plt
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy (ResNet50)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss (ResNet50)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D

# Define VGG16 model
def create_vgg16_model(input_shape, num_classes):
    # Load the VGG16 base model with pre-trained weights
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

    # Freeze the base model layers for transfer learning
    for layer in base_model.layers:
        layer.trainable = False

    # Add custom classification layers
    x = base_model.output
    x = Flatten()(x)  # Flatten the output of the base model
    x = Dense(256, activation='relu')(x)  # Fully connected layer
    x = Dropout(0.5)(x)  # Dropout for regularization
    predictions = Dense(num_classes, activation='softmax')(x)  # Final classification layer

    # Compile the model
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
learning_rate_scheduler = LearningRateScheduler(lr_schedule)

In [None]:
# Model configuration
input_shape = (224, 224, 3)
num_classes = 2  # Yes and No classes
vgg16_model = create_vgg16_model(input_shape, num_classes)

# Train the VGG16 model
history_vgg16 = vgg16_model.fit(
    train_images,  # Training images
    train_labels,  # One-hot encoded training labels
    batch_size=32,
    epochs=25,
    validation_data=(valid_images, valid_labels),  # Validation data
    callbacks=[early_stopping, learning_rate_scheduler]
)

In [None]:
# Save the VGG16 model and weights
vgg16_weights_path = '/content/vgg16_model_weights.weights.h5'
vgg16_model_path = '/content/vgg16_model.h5'

vgg16_model.save_weights(vgg16_weights_path)
vgg16_model.save(vgg16_model_path)

print("VGG16 model and weights saved successfully!")

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import load_model

# Load the trained VGG16 model
vgg16_model = load_model('/content/vgg16_model.h5')  # Path to the saved VGG16 model

# Debug: Check test data shape and content
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")
print(f"Sample test labels (one-hot): {test_labels[:10]}")

# Convert one-hot encoded test labels to integer format
test_labels_int = test_labels if len(test_labels.shape) == 1 else np.argmax(test_labels, axis=1)

# Predict using the VGG16 model
vgg16_predictions = vgg16_model.predict(test_images)
vgg16_predicted_classes = np.argmax(vgg16_predictions, axis=1)  # Convert predictions to class indices

# Calculate accuracy
vgg16_test_accuracy = accuracy_score(test_labels_int, vgg16_predicted_classes)
print(f"VGG16 Test Accuracy: {vgg16_test_accuracy * 100:.2f}%")

# Generate classification report
print("\nClassification Report (VGG16):")
target_names = [f"Class {cls}" for cls in np.unique(test_labels_int)]  # Dynamically create target names
print(classification_report(test_labels_int, vgg16_predicted_classes, target_names=target_names))

# Generate confusion matrix
print("\nConfusion Matrix (VGG16):")
vgg16_conf_matrix = confusion_matrix(test_labels_int, vgg16_predicted_classes)
print(vgg16_conf_matrix)

# Optional: Visualize the confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.heatmap(vgg16_conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=target_names, yticklabels=target_names)
plt.title("Confusion Matrix (VGG16)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# Optionally: Print individual predictions for each test image
for i, (true_label, pred_label) in enumerate(zip(test_labels_int, vgg16_predicted_classes)):
    print(f"Image {i + 1}: True Label: {true_label}, Predicted Label: {pred_label}")


In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Assuming 'vgg16_predictions' contains probabilities for the positive class (Glaucoma)
# and 'test_labels_int' are the true binary labels (0 or 1).

fpr, tpr, thresholds = roc_curve(test_labels_int, vgg16_predictions[:, 1]) # Assuming Glaucoma is class 1. Adjust if different.
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve (VGG16)')
plt.legend(loc="lower right")
plt.show()


In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Flatten, Dropout

# Plot training & validation accuracy values for VGG16
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history_vgg16.history['accuracy'])
plt.plot(history_vgg16.history['val_accuracy'])
plt.title('Model accuracy (VGG16)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values for VGG16
plt.subplot(1, 2, 2)
plt.plot(history_vgg16.history['loss'])
plt.plot(history_vgg16.history['val_loss'])
plt.title('Model loss (VGG16)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset, random_split
from PIL import Image
import timm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Paths for dataset
aug_yes_csv_path = "/content/organized_data/aug_yes/csv/data.csv"
aug_yes_images_folder = "/content/organized_data/aug_yes/images"

no_csv_path = "/content/organized_data/no/csv/data.csv"
no_images_folder = "/content/organized_data/no/images"

# Load CSVs
data_yes = pd.read_csv(aug_yes_csv_path)
data_yes['label'] = 1  # Glaucoma detected

data_no = pd.read_csv(no_csv_path)
data_no['label'] = 0  # No Glaucoma

# Combine datasets and shuffle
data = pd.concat([data_yes, data_no], ignore_index=True)
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Split into Train (70%), Validation (15%), Test (15%)
train_data, temp_data = train_test_split(data, test_size=0.3, stratify=data['label'], random_state=42)
valid_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_data['label'], random_state=42)

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom dataset class
class GlaucomaDataset(Dataset):
    def __init__(self, data, folder_paths, transform=None):
        self.data = data
        self.folder_paths = folder_paths
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        filename, label = row["Filename"], row["label"]
        image_folder = self.folder_paths['yes'] if label == 1 else self.folder_paths['no']
        image_path = os.path.join(image_folder, filename)

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, label

# Define folder paths
folder_paths = {"yes": aug_yes_images_folder, "no": no_images_folder}

# Create datasets
train_dataset = GlaucomaDataset(train_data, folder_paths, transform=transform)
valid_dataset = GlaucomaDataset(valid_data, folder_paths, transform=transform)
test_dataset = GlaucomaDataset(test_data, folder_paths, transform=transform)

# DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define Swin Transformer model
class SwinGlaucomaClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(SwinGlaucomaClassifier, self).__init__()
        self.swin = timm.create_model("swin_tiny_patch4_window7_224", pretrained=True, num_classes=num_classes)

    def forward(self, x):
        return self.swin(x)

# Initialize model
model = SwinGlaucomaClassifier(num_classes=2).to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

# Training function
def train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, num_epochs=25):
    best_acc = 0.0
    train_losses, val_losses = [], []

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}:")
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total
        train_losses.append(train_loss)

        # Validation phase
        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels.long())

                val_loss += loss.item() * images.size(0)
                _, predicted = outputs.max(1)
                correct += predicted.eq(labels).sum().item()
                total += labels.size(0)

        val_loss /= total
        val_acc = correct / total
        val_losses.append(val_loss)

        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_swin_model.pth")
            print("Best model saved!")

        scheduler.step()

    return train_losses, val_losses

# Train the model
train_losses, val_losses = train_model(model, train_loader, valid_loader, criterion, optimizer, scheduler, num_epochs=25)

# Load best model
model.load_state_dict(torch.load("best_swin_model.pth"))

# Evaluation
def evaluate_model(model, test_loader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(y_true, y_pred)
    print(f"\nTest Accuracy: {accuracy * 100:.2f}%")
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    # Confusion matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["No", "Yes"], yticklabels=["No", "Yes"])
    plt.title("Confusion Matrix (Swin Transformer)")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

# Evaluate the model
evaluate_model(model, test_loader)

In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

def evaluate_model_with_roc(model, test_loader, device):
    model.eval()
    y_true, y_scores = [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            probabilities = torch.softmax(outputs, dim=1)  # Get probabilities for each class
            y_scores.extend(probabilities[:, 1].cpu().numpy()) # Probability of the positive class (class 1, assuming binary classification)
            y_true.extend(labels.cpu().numpy())

    fpr, tpr, thresholds = roc_curve(y_true, y_scores)
    roc_auc = auc(fpr, tpr)

    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve (Swin Transformer)')
    plt.legend(loc="lower right")
    plt.show()

# Evaluate and plot ROC curve
evaluate_model_with_roc(model, test_loader, device)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])  # Access accuracy using history.history
plt.plot(history.history['val_accuracy'])  # Access validation accuracy using history.history
plt.title('Model accuracy (Swin Transformer)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(train_losses)  # Replace with your actual training loss history
plt.plot(val_losses)  # Replace with your actual validation loss history
plt.title('Model loss (Swin Transformer)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
import numpy as np
import torch
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Define number of classes
num_classes = 2

In [None]:
!pip install swin-transformer-pytorch

In [None]:
import torch
import timm  # Import timm for SwinTransformer

# Load TensorFlow Models
resnet_model = load_model("/content/resnet50_model.h5")
vgg_model = load_model("/content/vgg16_model.h5")

# Define model architecture (Must match the architecture you trained with)
# Instantiate SwinTransformer using timm.create_model
swin_model = timm.create_model("swin_tiny_patch4_window7_224", pretrained=False, num_classes=2)

# Load the state_dict, handling the key prefix
state_dict = torch.load("best_swin_model.pth", map_location=torch.device("cpu"))
# Remove the 'swin.' prefix from the keys
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[5:] # remove `swin.`
    new_state_dict[name] = v
# load params
swin_model.load_state_dict(new_state_dict)


# Set to evaluation mode
swin_model.eval()

In [None]:
# Assuming test_images are already preprocessed
test_images_tf = test_images  # Used for TensorFlow models
test_images_torch = torch.tensor(test_images.transpose(0, 3, 1, 2), dtype=torch.float32)  # For PyTorch Swin Transformer


In [None]:
# TensorFlow Models (ResNet & VGG)
resnet_preds = resnet_model.predict(test_images_tf)
vgg_preds = vgg_model.predict(test_images_tf)

# Swin Transformer (PyTorch)
with torch.no_grad():
    swin_preds = swin_model(test_images_torch)
    swin_preds = torch.nn.functional.softmax(swin_preds, dim=1).numpy()

# Convert predictions to probabilities
resnet_probs = np.array(resnet_preds)
vgg_probs = np.array(vgg_preds)
swin_probs = np.array(swin_preds)

In [None]:
ensemble_probs = (resnet_probs + vgg_probs + swin_probs) / 3
ensemble_preds = np.argmax(ensemble_probs, axis=1)

In [None]:
individual_preds = np.stack([
    np.argmax(resnet_probs, axis=1),
    np.argmax(vgg_probs, axis=1),
    np.argmax(swin_probs, axis=1)
])

# Perform majority voting
from scipy.stats import mode
ensemble_preds = mode(individual_preds, axis=0)[0].flatten()

In [None]:
true_labels = np.argmax(test_labels, axis=1)

print(f"Ensemble Test Accuracy: {accuracy_score(true_labels, ensemble_preds) * 100:.2f}%")
print("\nClassification Report:\n", classification_report(true_labels, ensemble_preds))

# Confusion Matrix
conf_matrix = confusion_matrix(true_labels, ensemble_preds)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["No", "Yes"], yticklabels=["No", "Yes"])
plt.title("Confusion Matrix (Ensemble Model)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# Assuming 'ensemble_probs' contains probabilities for the positive class (class 1)
fpr, tpr, thresholds = roc_curve(true_labels, ensemble_probs[:, 1])  # Use probabilities for class 1
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve (Ensemble Model)')
plt.legend(loc="lower right")
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Assuming history is the training history object returned by model.fit
plt.figure(figsize=(10, 5))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy (Ensemble Model)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss (Ensemble Model)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()


In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import load_model
import torch
import timm
from collections import OrderedDict
from scipy.stats import mode

# Load the trained models
resnet_model = load_model("/content/resnet50_model.h5")
vgg_model = load_model("/content/vgg16_model.h5")

# Instantiate SwinTransformer using timm.create_model
swin_model = timm.create_model("swin_tiny_patch4_window7_224", pretrained=False, num_classes=2)

# Load the state_dict, handling the key prefix
state_dict = torch.load("best_swin_model.pth", map_location=torch.device("cpu"))
# Remove the 'swin.' prefix from the keys
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[5:]  # remove `swin.`
    new_state_dict[name] = v
# load params
swin_model.load_state_dict(new_state_dict)
swin_model.eval()


def predict_glaucoma(image_path):
    # Preprocess the image for all models
    img = load_img(image_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array_tf = preprocess_input(img_array)
    img_array_torch = torch.tensor(img_array.transpose(0, 3, 1, 2), dtype=torch.float32)


    # Make predictions
    resnet_pred = resnet_model.predict(img_array_tf)
    vgg_pred = vgg_model.predict(img_array_tf)
    with torch.no_grad():
        swin_pred = swin_model(img_array_torch)
        swin_pred = torch.nn.functional.softmax(swin_pred, dim=1).numpy()

    # Ensemble predictions
    ensemble_prob = (resnet_pred + vgg_pred + swin_pred) / 3
    ensemble_pred = np.argmax(ensemble_prob, axis=1)

    # Return prediction
    return ensemble_pred[0]  # Return the class (0 or 1)

# Example usage
image_path = "/content/organized_data/aug_yes/images/037_2.jpg"
prediction = predict_glaucoma(image_path)

if prediction == 1:
  print("Glaucoma detected")
else:
  print("No Glaucoma")


In [None]:
# Example usage
image_path = "/content/organized_data/no/images/007.jpg"
prediction = predict_glaucoma(image_path)

if prediction == 1:
  print("Glaucoma detected")
else:
  print("No Glaucoma")