In [None]:
#mounting on to the drive
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
folder_path = '/content/drive/MyDrive/FML_Project'
print(folder_path)

/content/drive/MyDrive/FML_Project


In [None]:
import os
if os.path.isdir(folder_path):
    print("Folder exists")
else:
    print("Folder doesn't exist")

# List all files in the folder
for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    print(file_path)

Folder exists
/content/drive/MyDrive/FML_Project/df_prime_test.csv
/content/drive/MyDrive/FML_Project/df_prime_train.csv
/content/drive/MyDrive/FML_Project/Prime_FULL
/content/drive/MyDrive/FML_Project/resnet50_features.pkl
/content/drive/MyDrive/FML_Project/resnet50_features_testfix.pkl
/content/drive/MyDrive/FML_Project/resnet50_features_final.pkl


In [None]:
class NotebookArgs:
    def __init__(self, annot_test_prime = "/content/drive/MyDrive/FML_Project/df_prime_test.csv" , annot_train_prime = "/content/drive/MyDrive/FML_Project/df_prime_train.csv", data_root = "/content/drive/MyDrive/FML_Project"):
        self.annot_train_prime = annot_train_prime
        self.annot_test_prime = annot_test_prime
        self.data_root = data_root
args = NotebookArgs()

In [None]:
import torch
import torchvision
from torch.utils.data import Dataset,DataLoader
from tqdm import tqdm
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import argparse
import os
import copy
import torch
import cv2

In [None]:
LABELS_Severity = {35: 0,
                   43: 0,
                   47: 1,
                   53: 1,
                   61: 2,
                   65: 2,
                   71: 2,
                   85: 2}
mean = (.1706)
std = (.2112)
normalize = transforms.Normalize(mean=mean, std=std)

transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.ToTensor(),
    lambda x : x.expand(3,*x.shape[1:]),
    normalize,
])

In [None]:
class OCTDataset(Dataset):
    def __init__(self, args, subset='train', transform=None,):
        if subset == 'train':
            self.annot = pd.read_csv(args.annot_train_prime)
        elif subset == 'test':
            self.annot = pd.read_csv(args.annot_test_prime)
            
        self.annot['Severity_Label'] = [LABELS_Severity[drss] for drss in copy.deepcopy(self.annot['DRSS'].values)] 
        # print(self.annot)
        self.root = os.path.expanduser(args.data_root)
        self.transform = transform
        # self.subset = subset
        self.nb_classes=len(np.unique(list(LABELS_Severity.values())))
        self.path_list = self.annot['File_Path'].values
        self._labels = self.annot['Severity_Label'].values
        assert len(self.path_list) == len(self._labels)
        # idx_each_class = [[] for i in range(self.nb_classes)]

    def __getitem__(self, index):
        img, target = Image.open(self.root+self.path_list[index]).convert("L"), self._labels[index]

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self._labels)         

In [None]:
#Loading the dataset
trainset = OCTDataset(args, 'train', transform=transform)
testset = OCTDataset(args, 'test', transform=transform)

# Define hyperparameters
batch_size = 128
num_epochs = 1

train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)



In [None]:
#implementing resnet 50 feature extractor
device = torch.device('cuda:0')
resnet50 = torchvision.models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(resnet50.children())[:-1]).eval().to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 230MB/s]


In [None]:
def get_X_y_from_loader(loader):
    all_features = []
    all_labels = []
    for local_batch, local_labels in tqdm(loader):
        local_batch = local_batch.to(device)
        batched_features = model(local_batch).detach().cpu()
        flat_features = [image.flatten().numpy() for image in batched_features]
        flat_labels = [label.item() for label in local_labels]
        all_features.extend(flat_features)
        all_labels.extend(flat_labels)
    return all_features, all_labels

In [None]:
X_train, y_train = get_X_y_from_loader(train_loader)
X_test, y_test = get_X_y_from_loader(test_loader)

100%|██████████| 190/190 [34:02<00:00, 10.75s/it]
100%|██████████| 63/63 [11:22<00:00, 10.83s/it]


In [None]:
#implementing logisitc regression
from sklearn.linear_model import LogisticRegression

# Instantiate the logistic regression model
lr_model = LogisticRegression(C=10, class_weight='balanced', penalty='l2', solver='liblinear', multi_class='ovr')

# Fit the model to the training data
lr_model.fit(X_train, y_train)

In [None]:
y_pred = lr_model.predict(X_test)

from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score, f1_score

# Calculate balanced accuracy score
balanced_acc = balanced_accuracy_score(y_test, y_pred)
print("Balanced accuracy score:", balanced_acc)

# Calculate precision score
precision = precision_score(y_test, y_pred,average='weighted')
print("Precision score:", precision)

# Calculate recall score
recall = recall_score(y_test, y_pred,average='weighted')
print("Recall score:", recall)

# Calculate f1 score
f1 = f1_score(y_test, y_pred,average='weighted')
print("F1 score weighted:", f1)

# Calculate f1 score
f1 = f1_score(y_test, y_pred,average='micro')
print("F1 score Micro:", f1)

# Calculate f1 score
f1 = f1_score(y_test, y_pred,average='macro')
print("F1 score Macro:", f1)

Balanced accuracy score: 0.33721134855927487
Precision score: 0.38344861336048935
Recall score: 0.38224614999373985
F1 score weighted: 0.38278373481201944
F1 score Micro: 0.38224614999373985
F1 score Macro: 0.3367679860898461


In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd
labels = [0, 1, 2]  # label names

cm = confusion_matrix(y_test, y_pred, labels=labels)

cm_df = pd.DataFrame(cm, index=labels, columns=labels)

print(cm_df)

      0     1    2
0   908  1317  323
1  1072  2278  570
2   470   890  159


In [None]:
with open('svm_resnet50_77.pkl', 'wb') as f:
    pickle.dump(clf, f)