<a href="https://colab.research.google.com/github/jaya-rhupika/Predicton-of-DR-using-XAI-and-CNN/blob/main/Copy_of_DRprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Define the path to the zip file in Google Drive
zip_file_path = '/content/drive/My Drive/archive.zip'  # Adjust this path


In [3]:
import zipfile
import os

# Define the extraction directory
extract_dir = '/content/archive'

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print("Unzipping successful!")


Unzipping successful!


In [4]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import shutil
import random


In [5]:
base_path = '/content/archive/dr_unified_v2/dr_unified_v2/train'
subfolders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
print(f"Subfolders: {subfolders}")


Subfolders: ['0', '3', '4', '1', '2']


In [6]:
random.seed(42)

# Define split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Create directories for train, val, and test sets
train_dir = '/content/archive/train'
val_dir = '/content/archive/val'
test_dir = '/content/archive/test'

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Create class subfolders within train, val, and test directories
for subfolder in subfolders:
    os.makedirs(os.path.join(train_dir, subfolder), exist_ok=True)
    os.makedirs(os.path.join(val_dir, subfolder), exist_ok=True)
    os.makedirs(os.path.join(test_dir, subfolder), exist_ok=True)

# Function to move images to respective directories
def move_images(image_list, source_dir, target_dir):
    for image in image_list:
        shutil.move(os.path.join(source_dir, image), os.path.join(target_dir, image))

# Process each subfolder
for subfolder in subfolders:
    subfolder_path = os.path.join(base_path, subfolder)
    images = os.listdir(subfolder_path)
    random.shuffle(images)

    # Calculate split indices
    train_idx = int(train_ratio * len(images))
    val_idx = int((train_ratio + val_ratio) * len(images))

    # Split the data
    train_images = images[:train_idx]
    val_images = images[train_idx:val_idx]
    test_images = images[val_idx:]

    print(f"Processing class '{subfolder}':")
    print(f"  Training set size: {len(train_images)}")
    print(f"  Validation set size: {len(val_images)}")
    print(f"  Testing set size: {len(test_images)}")

    # Move images to respective directories
    move_images(train_images, subfolder_path, os.path.join(train_dir, subfolder))
    move_images(val_images, subfolder_path, os.path.join(val_dir, subfolder))
    move_images(test_images, subfolder_path, os.path.join(test_dir, subfolder))

print("Images moved to respective directories.")


Processing class '0':
  Training set size: 38613
  Validation set size: 8274
  Testing set size: 8275
Processing class '3':
  Training set size: 704
  Validation set size: 151
  Testing set size: 151
Processing class '4':
  Training set size: 1401
  Validation set size: 300
  Testing set size: 301
Processing class '1':
  Training set size: 2594
  Validation set size: 556
  Testing set size: 556
Processing class '2':
  Training set size: 8483
  Validation set size: 1818
  Testing set size: 1818
Images moved to respective directories.


In [7]:
# Create an ImageDataGenerator for training with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create an ImageDataGenerator for validation and testing (without augmentation)
val_test_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators for training, validation, and testing
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important for evaluation
)

# Print the class indices to ensure the labels are correctly mapped
print(train_generator.class_indices)


Found 51795 images belonging to 5 classes.
Found 11099 images belonging to 5 classes.
Found 11101 images belonging to 5 classes.
{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4}


In [8]:
import numpy as np

# Get a batch of images from the training generator
batch = next(train_generator)
images, labels = batch

# Print the statistics of the images in the batch
print("Batch statistics:")
print("Min pixel value:", np.min(images))
print("Max pixel value:", np.max(images))
print("Mean pixel value:", np.mean(images))
print("Standard deviation of pixel values:", np.std(images))


Batch statistics:
Min pixel value: 0.0
Max pixel value: 1.0
Mean pixel value: 0.3070035
Standard deviation of pixel values: 0.24139117
