### Import Libraries

In [1]:
import cv2
import pickle
import numpy as np 
import os
import pathlib

### Important Declarations

In [2]:
save_path = './'
data_dir = pathlib.Path("./dataset/train/train")

classes_dic = {
    "CNV" : list(data_dir.glob('CNV/*')),
    "DME" : list(data_dir.glob('DME/*')),
    "DRUSEN" : list(data_dir.glob('DRUSEN/*')),
    "NORMAL" : list(data_dir.glob('NORMAL/*')),
}
classes_labels = {
    "CNV" : 0,
    "DME" : 1,
    "DRUSEN" : 2,
    "NORMAL" : 3,
}

test_dir = pathlib.Path("./dataset/validation/validation")

test_dic = {
    "CNV" : list(test_dir.glob('CNV/*')),
    "DME" : list(test_dir.glob('DME/*')),
    "DRUSEN" : list(test_dir.glob('DRUSEN/*')),
    "NORMAL" : list(test_dir.glob('NORMAL/*')),
}

### Dealing with Data

In [3]:
def process_and_resize(img, func):
    processed = func(img)
    resized = cv2.resize(processed, (224,224))

    if len(resized.shape) == 2:
        resized = cv2.cvtColor(resized, cv2.COLOR_GRAY2RGB)
    
    return resized



def get_data(dic, func):
    image_data, label = [], []
    final_image = (224,224)

    for disease, images in dic.items():
        for image in images:
            img = cv2.imread(str(image))
            x, y, z = img.shape
            if (x == 496 and y == 512 ) or (x == 512 and y == 512) :  # Check against width and height separately
                final = process_and_resize(img,func)  # Pass image_size as a tuple
                image_data.append(final)
                label.append(classes_labels[disease])
    image_data = np.array(image_data)
    label = np.array(label)

    return  image_data, label

def balance_dataset(count,image_data, label):
    classes, counts = np.unique(label, return_counts=True)
    min_count = count/4

    # Undersample over-represented classes
    balanced_data = []
    for class_ in classes:
        class_indices = np.where(label == class_)[0]
        np.random.shuffle(class_indices)
        balanced_data.extend(image_data[class_indices[:int(min_count)]])

    balanced_data = np.array(balanced_data)
    balanced_label = np.repeat(classes, min_count)

    classes, counts = np.unique(balanced_label, return_counts=True)

    # Print class counts
    for class_, count in zip(classes, counts):
        print(f"Class {class_}: {count} instances")
    return balanced_data, balanced_label


def shuffle_data(image_data, labels):
    indices = np.arange(len(image_data))
    np.random.shuffle(indices)
    image_data = image_data[indices]
    labels = labels[indices]

    return image_data, labels


def save_pickle(folder,name , image_data, labels):
    with open(os.path.join(save_path,f"./{folder}/{name}.pickle"), 'wb') as f: #'wb' - file will be opened in writing mode
        pickle.dump( (image_data, labels), f) # f is opened pickle file

### Image processing Functions

##### functions

In [4]:
def lab(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2LAB)

def hsv(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

def median(image):
    return cv2.medianBlur(image, 3)

def clahecc(image):
    
    if len(image.shape) == 3 and image.shape[2] == 3:  # Check if the image is not already grayscale
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        grayscale_image = image
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_image = clahe.apply(grayscale_image)

    # Contrast Correction (optional)
    clahe_cc_image = np.clip(2.5 * clahe_image - 128, 0, 255).astype(np.uint8)
    return clahe_cc_image

def he(image):
    if len(image.shape) == 3 and image.shape[2] == 3:  # Check if the image is not already grayscale
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        grayscale_image = image
    return cv2.equalizeHist(grayscale_image)

def threshold(image):
    if len(image.shape) == 3 and image.shape[2] == 3:  # Check if the image is not already grayscale
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        grayscale_image = image
    return cv2.adaptiveThreshold(grayscale_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 5, 1)

def gabor(image):
    # Define parameters for Gabor filter
    ksize = 3  # Kernel size
    sigma = 10
    # Standard deviation of the Gaussian envelope
    theta = np.pi/4  # Orientation of the normal to the parallel stripes of the Gabor function
    lambda_ = 4  # Wavelength of the sinusoidal factor
    gamma = 0.25  # Spatial aspect ratio
    phi = 1  # Phase offset of the sinusoidal factor

    # Generate Gabor filter
    kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lambda_, gamma, phi, ktype=cv2.CV_32F)

    # Apply Gabor filter to the image
    filtered_image = cv2.filter2D(image, cv2.CV_8UC3, kernel)
    return filtered_image

def canny(image):
    # Convert image to grayscale
    if len(image.shape) == 3 and image.shape[2] == 3:  # Check if the image is not already grayscale
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        grayscale_image = image
    
    # Apply Gaussian blur to the grayscale image
    blurred_image = cv2.GaussianBlur(grayscale_image, (5, 5), 0)
    
    # Apply Canny edge detection
    edges = cv2.Canny(blurred_image, 50, 150)  # You can adjust the threshold values here
    
    return edges




##### 1. lab + hsv

In [5]:
def first(image):
    a = lab(image)
    b = hsv(a)
    return b

##### 2. lab + clachecc

In [6]:
def second(image):
    a = lab(image)
    c = clahecc(a)
    return c

##### 3. he

In [34]:
def third(image):
    return he(image)

##### 4. threshold

In [35]:
def fourth(image):
    return threshold(image)

##### 5. Gabor filter

In [36]:
def fifth(image):
    a = hsv(image)
    b = median(a)
    return gabor(b)

##### 6. Canny

In [37]:
def sixth(image):
    return canny(image)

### Data conversion

##### 1. First

In [38]:
# training data
print("________________________________________________________")
train_data, train_labels = get_data(classes_dic, first)

train_data, train_labels = balance_dataset(3200,train_data, train_labels)

train_data, train_labels = shuffle_data(train_data, train_labels)

save_pickle("first","train_first", train_data, train_labels)
print("________________________________________________________")
# testing data
test_data, test_labels = get_data(test_dic, first)

test_data, test_labels = balance_dataset(1000,test_data, test_labels)

test_data, test_labels = shuffle_data(test_data, test_labels)

test_data, test_labels = test_data[:1000], test_labels[:1000]

save_pickle("first","test_first", test_data, test_labels)
print("________________________________________________________")

________________________________________________________


Class 0: 800 instances
Class 1: 800 instances
Class 2: 800 instances
Class 3: 800 instances
________________________________________________________
Class 0: 250 instances
Class 1: 250 instances
Class 2: 250 instances
Class 3: 250 instances
________________________________________________________


##### 2. Second

In [7]:
# training data
print("________________________________________________________")
train_data, train_labels = get_data(classes_dic, second)

train_data, train_labels = balance_dataset(3200,train_data, train_labels)

train_data, train_labels = shuffle_data(train_data, train_labels)

save_pickle("second","train_second", train_data, train_labels)

# testing data
print("________________________________________________________")
test_data, test_labels = get_data(test_dic, second)

test_data, test_labels = balance_dataset(1000,test_data, test_labels)

test_data, test_labels = shuffle_data(test_data, test_labels)

test_data, test_labels = test_data[:1000], test_labels[:1000]

save_pickle("second","test_second", test_data, test_labels)
print("________________________________________________________")

________________________________________________________


Class 0: 800 instances
Class 1: 800 instances
Class 2: 800 instances
Class 3: 800 instances
________________________________________________________
Class 0: 250 instances
Class 1: 250 instances
Class 2: 250 instances
Class 3: 250 instances
________________________________________________________


##### 3. Third

In [40]:
print("________________________________________________________")
# training data
train_data, train_labels = get_data(classes_dic, third)

train_data, train_labels = balance_dataset(3200,train_data, train_labels)

train_data, train_labels = shuffle_data(train_data, train_labels)

save_pickle("third","train_third", train_data, train_labels)

print("________________________________________________________")
# testing data
test_data, test_labels = get_data(test_dic, third)

test_data, test_labels = balance_dataset(1000,test_data, test_labels)

test_data, test_labels = shuffle_data(test_data, test_labels)

test_data, test_labels = test_data[:1000], test_labels[:1000]

save_pickle("third","test_third", test_data, test_labels)
print("________________________________________________________")

________________________________________________________
Class 0: 800 instances
Class 1: 800 instances
Class 2: 800 instances
Class 3: 800 instances
________________________________________________________
Class 0: 250 instances
Class 1: 250 instances
Class 2: 250 instances
Class 3: 250 instances
________________________________________________________


##### 4. Fourth

In [41]:
# training data
print("________________________________________________________")
train_data, train_labels = get_data(classes_dic, fourth)

train_data, train_labels = balance_dataset(3200,train_data, train_labels)

train_data, train_labels = shuffle_data(train_data, train_labels)

save_pickle("fourth","train_fourth", train_data, train_labels)

print("________________________________________________________")
# testing data
test_data, test_labels = get_data(test_dic, fourth)

test_data, test_labels = balance_dataset(1000,test_data, test_labels)

test_data, test_labels = shuffle_data(test_data, test_labels)

test_data, test_labels = test_data[:1000], test_labels[:1000]

save_pickle("fourth","test_fourth", test_data, test_labels)
print("________________________________________________________")

________________________________________________________
Class 0: 800 instances
Class 1: 800 instances
Class 2: 800 instances
Class 3: 800 instances
________________________________________________________
Class 0: 250 instances
Class 1: 250 instances
Class 2: 250 instances
Class 3: 250 instances
________________________________________________________


##### 5. Fifth

In [42]:
# training data
print("________________________________________________________")
train_data, train_labels = get_data(classes_dic, fifth)

train_data, train_labels = balance_dataset(3200,train_data, train_labels)

train_data, train_labels = shuffle_data(train_data, train_labels)

save_pickle("fifth","train_fifth", train_data, train_labels)

# testing data
print("________________________________________________________")
test_data, test_labels = get_data(test_dic, fifth)

test_data, test_labels = balance_dataset(1000,test_data, test_labels)

test_data, test_labels = shuffle_data(test_data, test_labels)

test_data, test_labels = test_data[:1000], test_labels[:1000]

save_pickle("fifth","test_fifth", test_data, test_labels)
print("________________________________________________________")

________________________________________________________


Class 0: 800 instances
Class 1: 800 instances
Class 2: 800 instances
Class 3: 800 instances
________________________________________________________
Class 0: 250 instances
Class 1: 250 instances
Class 2: 250 instances
Class 3: 250 instances
________________________________________________________


##### 6. Sixth

In [43]:
# training data
print("________________________________________________________")
train_data, train_labels = get_data(classes_dic, sixth)

train_data, train_labels = balance_dataset(3200,train_data, train_labels)

train_data, train_labels = shuffle_data(train_data, train_labels)

save_pickle("sixth","train_sixth", train_data, train_labels)

# testing data
print("________________________________________________________")
test_data, test_labels = get_data(test_dic, sixth)

test_data, test_labels = balance_dataset(1000,test_data, test_labels)

test_data, test_labels = shuffle_data(test_data, test_labels)

test_data, test_labels = test_data[:1000], test_labels[:1000]

save_pickle("sixth","test_sixth", test_data, test_labels)
print("________________________________________________________")

________________________________________________________


Class 0: 800 instances
Class 1: 800 instances
Class 2: 800 instances
Class 3: 800 instances
________________________________________________________
Class 0: 250 instances
Class 1: 250 instances
Class 2: 250 instances
Class 3: 250 instances
________________________________________________________
