In [1]:
#code written by Matthew Miller (adapted from chatGPT and prior code from class)

In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models

import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np

## Set the image folder and read the CSV file (the CSV files and the images must be in a single folder)

In [3]:
# Folder containing the image files and the corresponding CSV file
folder_path = "Glaucoma Dataset/train_0_first_50"
csv_file = "JustRAIGS_Train_labels_first50.csv"
csv_path = os.path.join(folder_path,csv_file)
print(csv_path)

# Read the CSV file containing the image filenames and classifications
data = pd.read_csv(csv_path)
#print(data)

Glaucoma Dataset/train_0_first_50\JustRAIGS_Train_labels_first50.csv


In [8]:
# Define empty lists to store the image data and corresponding classifications
images = []
classifications = []

# Define the target size for resizing the images
## This step is limiting our resolution, probably way too aggressive eventually, but fine for testing
target_size = (100, 100)  # Adjust the size as needed

#target_size = (1944, 1944)  # this should make the images square initally and size down to the most common
#smallest dimension for a few of the images.
#Going larger may force interpolation, which wouldn't be ideal.
# this method is also distorting the aspect ratio, but that won't matter later since the optic disk is what we care about,
#and we can define a constant size for that after preprocessing



# Iterate over each row in the CSV file
for index, row in data.iterrows():
    # Read the image file
    image_filename = row['Eye ID']  # The column containing the image filenames
    #print(image_filename)
    image_path = os.path.join(folder_path, image_filename)
    #print(image_path)
    image = cv2.imread(image_path)  # Use cv2.imread for reading images
    
    
    
    # Preprocess the image as necessary (e.g., resizing, hist eq, normalization)  
    # Preprocess the image by resizing it to the target size
    image_resized = cv2.resize(image, target_size)
    
    
    b, g, r = cv2.split(image_resized)
    # Perform histogram equalization and normalization on each channel
    b_eq = cv2.equalizeHist(b)
    g_eq = cv2.equalizeHist(g)
    r_eq = cv2.equalizeHist(r)
    # Merge the equalized channels back into a color image
    image_eq = cv2.merge((b_eq, g_eq, r_eq))
    
    # Normalize the image intensity values
    image_eq = cv2.normalize(image_eq, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
    
   
    # Add the preprocessed image to the images list
    images.append(image_eq)
    
    # Display the image if testing is necessary(RGB stuff is so the images display with right colors (BGR to RGB))
    #image_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)
    #plt.imshow(image_rgb)
    #plt.show()
    
    # Get the classification label
    classification = row['Final Label']  # Assuming 'Classification' is the column containing the classifications
    classifications.append(classification)

In [9]:
# Convert the lists to numpy arrays for further processing
images = np.array(images)
#print(images)

#This next line works correctly for reading the classifications
classifications = np.array(classifications)
#print(classifications)
# Now you can use the images and classifications for training your model

## Need to split the data now into training and validation

In [10]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
#0.2 is setting the validation data size to 20% of the total (80/20 split) and the random state is the seed
X_train, X_val, y_train, y_val = train_test_split(images, classifications, test_size=0.2, random_state=42)

# Print the sizes of the training and validation sets
print("Number of training samples:", len(X_train))
print("Number of validation samples:", len(X_val))


Number of training samples: 40
Number of validation samples: 10


## This is our neural network, the parameters and network architecture are just an example

In [11]:

# Step 3: Build the Neural Network Model
#change the image height and width (100x100 for this example to fit the output of the preprocessed images above)
image_height = 100
image_width = 100

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Step 4: Compile the Model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Step 5: Train the Model
#history = model.fit(train_images, train_labels, epochs=10, validation_data=(val_images, val_labels))

history = model.fit(X_train, y_train, epochs=1, validation_data=(X_val, y_val))


# Step 6: Evaluate the Model
#test_loss, test_acc = model.evaluate(test_images, test_labels)
#print('Test accuracy:', test_acc)

loss, accuracy = model.evaluate(X_val, y_val)
print("Validation accuracy:", accuracy)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 268ms/step - accuracy: 0.6583 - loss: 29.0725 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Validation accuracy: 1.0
