In [None]:
#mounting on the drive
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [None]:

folder_path = '/content/drive/MyDrive/FML_Project'
print(folder_path)

/content/drive/MyDrive/FML_Project


In [None]:
import os
if os.path.isdir(folder_path):
    print("Folder exists")
else:
    print("Folder doesn't exist")

# List all files in the folder
for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    print(file_path)


Folder exists
/content/drive/MyDrive/FML_Project/df_prime_test.csv
/content/drive/MyDrive/FML_Project/df_prime_train.csv
/content/drive/MyDrive/FML_Project/Prime_FULL
/content/drive/MyDrive/FML_Project/resnet50_features.pkl
/content/drive/MyDrive/FML_Project/resnet50_features_testfix.pkl
/content/drive/MyDrive/FML_Project/resnet50_features_final.pkl


In [None]:
class NotebookArgs:
    def __init__(self, annot_test_prime = "/content/drive/MyDrive/FML_Project/df_prime_test.csv" , annot_train_prime = "/content/drive/MyDrive/FML_Project/df_prime_train.csv", data_root = "/content/drive/MyDrive/FML_Project"):
        self.annot_train_prime = annot_train_prime
        self.annot_test_prime = annot_test_prime
        self.data_root = data_root
args = NotebookArgs()

In [None]:
import torch
import torchvision
from torch.utils.data import Dataset,DataLoader
from tqdm import tqdm
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import argparse
import os
import copy
import torch
from sklearn.model_selection import GridSearchCV
import cv2

In [None]:
LABELS_Severity = {35: 0,
                   43: 0,
                   47: 1,
                   53: 1,
                   61: 2,
                   65: 2,
                   71: 2,
                   85: 2}
mean = (.1706)
std = (.2112)
normalize = transforms.Normalize(mean=mean, std=std)

transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.ToTensor(),
    lambda x : x.expand(3,*x.shape[1:]),
    normalize,
])

In [None]:
class OCTDataset(Dataset):
    def __init__(self, args, subset='train', transform=None,):
        if subset == 'train':
            self.annot = pd.read_csv(args.annot_train_prime)
        elif subset == 'test':
            self.annot = pd.read_csv(args.annot_test_prime)
            
        self.annot['Severity_Label'] = [LABELS_Severity[drss] for drss in copy.deepcopy(self.annot['DRSS'].values)] 
        # print(self.annot)
        self.root = os.path.expanduser(args.data_root)
        self.transform = transform
        # self.subset = subset
        self.nb_classes=len(np.unique(list(LABELS_Severity.values())))
        self.path_list = self.annot['File_Path'].values
        self._labels = self.annot['Severity_Label'].values
        assert len(self.path_list) == len(self._labels)
        # idx_each_class = [[] for i in range(self.nb_classes)]

    def __getitem__(self, index):
        img, target = Image.open(self.root+self.path_list[index]).convert("L"), self._labels[index]

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self._labels)         

In [None]:
#loading the data
trainset = OCTDataset(args, 'train', transform=transform)
testset = OCTDataset(args, 'test', transform=transform)

# Define hyperparameters
batch_size = 128
num_epochs = 1

train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)



In [None]:
#creating resnet50 feature extractor
device = torch.device('cuda:0')
resnet50 = torchvision.models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(resnet50.children())[:-1]).eval().to(device)

In [None]:
#performing resnet50 feature extraction
def get_X_y_from_loader(loader):
    all_features = []
    all_labels = []
    for local_batch, local_labels in tqdm(loader):
        local_batch = local_batch.to(device)
        batched_features = model(local_batch).detach().cpu()
        flat_features = [image.flatten().numpy() for image in batched_features]
        flat_labels = [label.item() for label in local_labels]
        all_features.extend(flat_features)
        all_labels.extend(flat_labels)
    return all_features, all_labels

In [None]:
X_train, y_train = get_X_y_from_loader(train_loader)
X_test, y_test = get_X_y_from_loader(test_loader)

In [None]:
# import pickle
# with open('/content/drive/MyDrive/FML_Project/resnet50_features_final.pkl', 'rb') as f:
#     X_train, y_train, X_test, y_test = pickle.load(f)
# print (f"Train Size : {len(X_train)} {len(y_train)}")
# print (f"Test Size: {len(X_test)},  {len(y_test)}")

Train Size : 24252 24252
Test Size: 7987,  7987


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Define the number of input features
input_dim = len(X_train[0])

# Define the number of output classes
num_classes = 3

# Convert the training and test sets to PyTorch tensors
X_train_tensor = torch.Tensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.LongTensor(y_test)

# Define the model architecture
model = nn.Sequential(
    nn.Linear(input_dim, 1024),
    nn.ReLU(),
    nn.Linear(1024,256),
    nn.ReLU(),
    nn.Linear(256,64),
    nn.ReLU(),
    nn.Linear(64,16),
    nn.ReLU(),
    nn.Linear(16, 8),
    nn.ReLU(),
    nn.Linear(8, num_classes)
)
model = model.cuda()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

# Define the training data loader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
#print(train_dataset)
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        #print(batch_idx)
        data = data.cuda()
        target = target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    # Evaluate the model on the test data
    with torch.no_grad():
        output = model(X_test_tensor.cuda())
        _, predicted = torch.max(output.data, 1)
        predicted = predicted.cpu()
        accuracy = (predicted == y_test_tensor).sum().item() / len(y_test_tensor)
        print('Epoch:', epoch+1, 'Loss:', loss.item(), 'Test Accuracy:', accuracy)
        # print(f'Test accuracy on {len(predicted)} points is {accuracy}')


  X_train_tensor = torch.Tensor(X_train)


Epoch: 1 Loss: 1.160139560699463 Test Accuracy: 0.49079754601226994
Epoch: 2 Loss: 1.0203698873519897 Test Accuracy: 0.49079754601226994
Epoch: 3 Loss: 0.9909018874168396 Test Accuracy: 0.49079754601226994
Epoch: 4 Loss: 0.9957444667816162 Test Accuracy: 0.5169650682358833
Epoch: 5 Loss: 0.9734517931938171 Test Accuracy: 0.5033178915738075
Epoch: 6 Loss: 0.9358988404273987 Test Accuracy: 0.4794040315512708
Epoch: 7 Loss: 0.8478348851203918 Test Accuracy: 0.4915487667459622
Epoch: 8 Loss: 0.8713628649711609 Test Accuracy: 0.4601226993865031
Epoch: 9 Loss: 0.9815295338630676 Test Accuracy: 0.4735194691373482
Epoch: 10 Loss: 0.8620108962059021 Test Accuracy: 0.4958056842368849


In [None]:
from sklearn.metrics import precision_score, recall_score, balanced_accuracy_score

with torch.no_grad():
    output = model(X_test_tensor.cuda())
    _, predicted = torch.max(output.data, 1)
    predicted = predicted.cpu()
    accuracy = (predicted == y_test_tensor).sum().item() / len(y_test_tensor)
    print(f'Test accuracy on {len(predicted)} points is {accuracy}')
    f1 = f1_score(y_test_tensor.cpu().numpy(), predicted.numpy(), average='weighted')
    precision = precision_score(y_test_tensor.cpu().numpy(), predicted.numpy(), average='weighted')
    recall = recall_score(y_test_tensor.cpu().numpy(), predicted.numpy(), average='weighted')
    balanced_accuracy = balanced_accuracy_score(y_test_tensor.cpu().numpy(), predicted.numpy())
    print(f'f1 score on {len(predicted)} points is {f1}')
    print(f'Test precision on {len(predicted)} points is {precision}')
    print(f'Test recall on {len(predicted)} points is {recall}')
    print(f'Test balanced accuracy on {len(predicted)} points is {balanced_accuracy}')

Test accuracy on 7987 points is 0.4958056842368849
f1 score on 7987 points is 0.4343224447707391
Test precision on 7987 points is 0.39956063571513467
Test recall on 7987 points is 0.4958056842368849
Test balanced accuracy on 7987 points is 0.3834837781266353


  _warn_prf(average, modifier, msg_start, len(result))
