# Import Necessary Libraries

In [1]:
import pandas as pd
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Import CSV File with train labels

In [2]:
csv_file = "trainLabels.csv"
df = pd.read_csv(csv_file)

# Check the first few rows of the DataFrame to understand its structure
print(df.head())

# Check the distribution of the 'level' column
print(df['level'].value_counts())

      image  level
0   10_left      0
1  10_right      0
2   13_left      0
3  13_right      0
4   15_left      1
level
0    25810
2     5292
1     2443
3      873
4      708
Name: count, dtype: int64


In [3]:
# Define the folder names
folders = ["No_DR", "Mild", "Moderate", "Severe", "Proliferate_DR"]

# Creating subfolders
base_directory = "Dataset"  # base directory

# Create the folders if they don't exist
for folder in folders:
    folder_path = os.path.join(base_directory, folder)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

## Arrange Images into Subfolders

In [None]:
import os
import pandas as pd
import shutil

# Load the CSV file from the directory 
csv_file = "trainLabels.csv"
df = pd.read_csv(csv_file)

# Define the source directory for images
source_directory = "train"  # Images are in the same directory as your notebook

# Define the target directory for images with level 4 (Proliferate_DR) , similar prcodure applies for other classes
target_directory = "Dataset/Proliferate_DR"

# Create the target directory if it doesn't exist
if not os.path.exists(target_directory):
    os.makedirs(target_directory)

# Iterate through the CSV data and copy images with level 4 to the target directory
for index, row in df.iterrows():
    image_name = row['image']
    level = row['level']

    if level == 4: #specify level here
        source_path = os.path.join(source_directory, f"{image_name}.jpeg")
        target_path = os.path.join(target_directory, f"{image_name}.jpeg")

        # Copy the image to the Proliferate_DR folder
        shutil.copy(source_path, target_path)

print("Images with level 4 have been copied to the Proliferate_DR folder in the 'Dataset' directory.")


# Resize all the images to fixed width and height of 224

In [None]:
import cv2
import os

# Define the target size (224x224)
target_size = (224, 224)

# List of subfolders within the "Dataset" folder
subfolders = ["No_DR", "Mild", "Moderate", "Severe", "Proliferate_DR"]

# Iterate through the subfolders and resize the images
for subfolder in subfolders:
    subfolder_path = os.path.join("Dataset", subfolder)

    for filename in os.listdir(subfolder_path):
        if filename.endswith(".jpeg"):
            image_path = os.path.join(subfolder_path, filename)
            try:
                # Open the image using OpenCV
                image = cv2.imread(image_path)
                
                # Resize the image to the target size
                resized_image = cv2.resize(image, target_size)

                # Save the resized image back to the original path
                cv2.imwrite(image_path, resized_image)
            except Exception as e:
                print(f"Error processing {image_path}: {str(e)}")

print("Images in subfolders have been resized to 224x224 using OpenCV.")


# Normalize all the images in the subfolders

In [None]:
import cv2
import os

# List of subfolders within the "Dataset" folder
subfolders = ["No_DR", "Mild", "Moderate", "Severe", "Proliferate_DR"]

# Define the directory to store the normalized images
normalized_folder = "Normalized_Dataset"

# Create the directory if it doesn't exist
if not os.path.exists(normalized_folder):
    os.makedirs(normalized_folder)

# Iterate through the subfolders and normalize the images
for subfolder in subfolders:
    subfolder_path = os.path.join("Dataset", subfolder)
    normalized_subfolder_path = os.path.join(normalized_folder, subfolder)

    if not os.path.exists(normalized_subfolder_path):
        os.makedirs(normalized_subfolder_path)

    for filename in os.listdir(subfolder_path):
        if filename.endswith(".jpeg"):
            image_path = os.path.join(subfolder_path, filename)
            try:
                # Open the image using OpenCV
                image = cv2.imread(image_path)

                # Normalize the pixel values to the range [0, 1]
                normalized_image = image / 255.0

                # Define the path to save the normalized image in the normalized folder
                normalized_image_path = os.path.join(normalized_subfolder_path, filename)

                # Save the normalized image
                cv2.imwrite(normalized_image_path, (normalized_image * 255.0).astype(int))
            except Exception as e:
                print(f"Error processing {image_path}: {str(e)}")

print("Images in subfolders have been normalized and saved in the 'Normalized_Dataset' folder using OpenCV.")


# Balance the dataset through data augmentation

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

# Specify the path to the main folder containing subfolders
main_folder_path = "Datasets/APTOS2019 Dataset/Augmented_Balanced Dataset_2"

# List of subfolders in the main folder
subfolders = ["Mild", "Moderate", "Severe", "Proliferate_DR"]

# Desired number of images per class
desired_images_per_class = 1805

# Augmentation configuration for ImageDataGenerator
datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    rotation_range=50,
    width_shift_range=0.2,
    fill_mode='nearest'
)

# Loop through each subfolder
for subfolder in subfolders:
    subfolder_path = os.path.join(main_folder_path, subfolder)

    # List all image files in the subfolder
    image_files = [f for f in os.listdir(subfolder_path) if f.endswith(".png")]  # Change the extension if needed

    # Check if there are images in the subfolder
    if not image_files:
        print(f"No images found in {subfolder}. Skipping augmentation.")
        continue

    # Calculate the number of images to augment
    num_images_to_augment = max(0, desired_images_per_class - len(image_files))

    # Augment images until the desired number is reached
    for i in range(num_images_to_augment):
        # Choose a random image file
        original_image_path = os.path.join(subfolder_path, np.random.choice(image_files))

        # Load the image
        img = load_img(original_image_path)
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)

        # Generate augmented images
        for j, batch in enumerate(datagen.flow(x, batch_size=1)):
            # Save augmented images using cv2.imwrite
            augmented_image_path = os.path.join(subfolder_path, f"augmented_image_{i * 5 + j + 1}.png")
            cv2.imwrite(augmented_image_path, cv2.cvtColor(np.array(array_to_img(batch[0])), cv2.COLOR_RGB2BGR))

            # Plot the original image
            plt.figure(figsize=(15, 5))
            plt.subplot(1, 6, 1)
            plt.imshow(array_to_img(batch[0]))
            plt.title("Original Image")
            plt.axis("off")

            # Plot augmented images
            for k in range(5):
                plt.subplot(1, 6, k + 2)
                plt.imshow(array_to_img(datagen.random_transform(x[0])))
                plt.title(f"Augmentation {k + 1}")
                plt.axis("off")

            plt.show()
            break  # Break the loop to generate only one set of augmented images

print("Augmentation and Saving completed.")


# Training Preparation

In [1]:
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt

In [83]:
IMAGE_SIZE = 224 
BATCH_SIZE = 40
EPOCHS=50

# Load the Balanced Dataset

In [None]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    "Datasets/APTOS2019 Dataset/Augmented_Balanced Dataset",
    shuffle=True,
    image_size = (IMAGE_SIZE,IMAGE_SIZE),
    batch_size = BATCH_SIZE
)

In [None]:
class_names = dataset.class_names

# Visualize some images from the dataset

In [None]:
plt.figure(figsize=(10,10))
for image_batch, label_batch in dataset.take(1):
  for i in range(12):
    ax = plt.subplot(3,4,i+1)
    plt.imshow(image_batch[i].numpy().astype("uint8"))
    plt.title(class_names[label_batch[i]])
    plt.axis("Off")

# Function for splitting the dataset

In [79]:
def get_dataset_partitions(ds, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
  ds_size = len(ds)
  if shuffle:
    ds = ds.shuffle(shuffle_size, seed=12)
  train_size = int(train_split * ds_size)
  val_size = int(val_split * ds_size)
  train_ds = ds.take(train_size)
  val_ds = ds.skip(train_size).take(val_size)
  test_ds = ds.skip(train_size).take(val_size)
  return train_ds, val_ds, test_ds

In [80]:
train_ds, val_ds, test_ds = get_dataset_partitions(dataset)

# Defining preprocessing layer of neural network

In [81]:
resize_rescale = tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(IMAGE_SIZE,IMAGE_SIZE),
    layers.experimental.preprocessing.Rescaling(1.0/255)
])

# Model Architecture

In [105]:
model = models.Sequential([
    resize_rescale,
    # Convolutional Block 1
    layers.Conv2D(64, (3, 3), 1, activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    
    # Convolutional Block 2
    layers.Conv2D(128, (3, 3), 1, activation='relu'),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    
    # Convolutional Block 3
    layers.Conv2D(64, (3, 3), 1, activation='relu'),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    
     # Convolutional Block 4
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    
    # Flatten and Fully Connected Layers
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    
    # Dropout Layer
    layers.Dropout(0.5),
    
    # Output Layer
    layers.Dense(5, activation='softmax')
])
model.build(input_shape=(32,IMAGE_SIZE,IMAGE_SIZE,3))


In [None]:
model.summary()

# Model Compilation

In [84]:
model.compile(
    optimizer='Adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

# Model Training

In [None]:
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1,
    validation_data=val_ds,
)

# Model Evaluation

In [None]:
scores = model.evaluate(test_ds)

In [53]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
#Accuracy
plt.figure(figsize=(8, 4))
plt.subplot(1,2,1)
plt.plot(range(EPOCHS), acc, label = 'Training Accuracy')
plt.plot(range(EPOCHS), val_acc, label = 'Validation Accuracy')
plt.legend(loc = 'lower right')
plt.title('Training and Validation Accuracy')

#Loss
plt.subplot(1,2,2)
plt.plot(range(EPOCHS), loss, label = 'Training Loss')
plt.plot(range(EPOCHS), val_loss, label = 'Validation Loss')
plt.legend(loc = 'upper right')
plt.title('Training and Validation Loss')

In [56]:
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)
    
    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

In [None]:
plt.figure(figsize = (15, 15))
for images, labels in test_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i+1)
        plt.imshow(images[i].numpy().astype("uint8"))
        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = class_names[labels[i]]
        plt.title(f"Actual: {actual_class}. \n Predicted: {predicted_class}. \n Confidence: {confidence} %")
        plt.axis("off")

In [None]:
model.save(f"Models/APTOS95")

# Classification Report Generation

In [59]:
true_ = []
pred_ = []
temp = 0
for images, labels in test_ds:
        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = labels[i]
        true_.insert(i,actual_class)
        temp = class_names.index(predicted_class)
        pred_.insert(i, temp)

In [None]:
true_= np.array(true_)

In [None]:
pred_

In [None]:
report = classification_report(true_, pred_)
print(report)

# Generate Confusion Matrix

In [None]:
confusion_mat = confusion_matrix(true_, pred_)

# Create a heatmap to visualize the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mat, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()