In [1]:
#including all the necessary libraries
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torchvision import transforms
import numpy as np
import pandas as pd
from PIL import Image
import argparse
import os
import copy
import torch
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score
import cv2
from skimage.feature import hog

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#performing the transform
LABELS_Severity = {35: 0,
                   43: 0,
                   47: 1,
                   53: 1,
                   61: 2,
                   65: 2,
                   71: 2,
                   85: 2}


mean = (.1706)
std = (.2112)
normalize = transforms.Normalize(mean=mean, std=std)

transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    transforms.ToTensor(),
    normalize,
])

In [3]:
class OCTDataset(Dataset):
    def __init__(self, args, subset='train', transform=None,):
        if subset == 'train':
            self.annot = pd.read_csv(args.annot_train_prime)
        elif subset == 'test':
            self.annot = pd.read_csv(args.annot_test_prime)
            
        self.annot['Severity_Label'] = [LABELS_Severity[drss] for drss in copy.deepcopy(self.annot['DRSS'].values)] 
        # print(self.annot)
        self.root = os.path.expanduser(args.data_root)
        self.transform = transform
        # self.subset = subset
        self.nb_classes=len(np.unique(list(LABELS_Severity.values())))
        self.path_list = self.annot['File_Path'].values
        self._labels = self.annot['Severity_Label'].values
        assert len(self.path_list) == len(self._labels)
        # idx_each_class = [[] for i in range(self.nb_classes)]

    def __getitem__(self, index):
        img, target = Image.open(self.root+self.path_list[index]).convert("L"), self._labels[index]

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self._labels)         

In [4]:
class NotebookArgs:
    def __init__(self, annot_train_prime = 'df_prime_train.csv', annot_test_prime = 'df_prime_test.csv', data_root = '/storage/home/hpaceice1/shared-classes/materials/ece8803fml/'):
        self.annot_train_prime = annot_train_prime
        self.annot_test_prime = annot_test_prime
        self.data_root = data_root
args = NotebookArgs()

In [5]:
#loading the data 
trainset = OCTDataset(args, 'train', transform=transform)
testset = OCTDataset(args, 'test', transform=transform)

# Define hyperparameters
batch_size = 32
num_epochs = 1

train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False)

In [6]:
def get_X_y_from_loader(loader):
    X, y = [], []
    for sample in tqdm(loader, total=len(loader)):
        images, labels = sample[0], sample[1]
        X.extend([a.numpy().flatten() for a in images])
        y.extend([a.numpy().flatten() for a in labels])
        break
    return X,y

In [7]:
X_train, y_train = get_X_y_from_loader(train_loader)

  0%|          | 0/758 [00:00<?, ?it/s]


In [8]:
print(X_train[0].shape)

(50176,)


In [9]:
def get_X_y_from_loader(loader):
    X, y = [], []
    #i=0
    for sample in tqdm(loader, total=len(loader)):
        #i = i+1
        images, labels = sample[0], sample[1]
        X.extend([a.numpy()[0] for a in images])
        y.extend([a.numpy().flatten() for a in labels])
        #break
        #if(i == 10):
         #   break
    return X,y
X_train, y_train = get_X_y_from_loader(train_loader)
print(len(X_train))

100%|██████████| 758/758 [01:13<00:00, 10.35it/s]

24252





In [10]:
X_train_hog = []
for i in tqdm(range(len(X_train))):
    #X_hog = X_train[i].reshape((224,224))
    hog_features = hog(X_train[i], pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), orientations=2, block_norm='L2-Hys',
                              feature_vector=True)
    X_train_hog.append(hog_features)
print(X_train_hog[0].shape)
print(len(X_train_hog))

100%|██████████| 24252/24252 [09:12<00:00, 43.93it/s]

(5832,)
24252





In [11]:
#performing grid search
'''from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
hyperparameters = { 'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga'],
    'class_weight': [None, 'balanced']}

# Define the logistic regression model
model = LogisticRegression(max_iter=500)

# Define the grid search object
grid_search = GridSearchCV(model, hyperparameters, cv=5, scoring='accuracy')

X_train_grid = X_train_hog[0:1000]
y_train_grid = y_train[0:1000]
y_train_grid  = np.ravel(y_train_grid)
# Fit the grid search object on the training data
grid_search.fit(X_train_grid, y_train_grid)

# Get the best hyperparameters
best_params = grid_search.best_params_


# Print the best hyperparameters and their corresponding score
print("Best hyperparameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

'''

'from sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import GridSearchCV\n\n# Define the hyperparameter grid\nhyperparameters = { \'penalty\': [\'l1\', \'l2\'],\n    \'C\': [0.001, 0.01, 0.1, 1, 10, 100],\n    \'solver\': [\'liblinear\', \'newton-cg\', \'lbfgs\', \'sag\', \'saga\'],\n    \'class_weight\': [None, \'balanced\']}\n\n# Define the logistic regression model\nmodel = LogisticRegression(max_iter=500)\n\n# Define the grid search object\ngrid_search = GridSearchCV(model, hyperparameters, cv=5, scoring=\'accuracy\')\n\nX_train_grid = X_train_hog[0:1000]\ny_train_grid = y_train[0:1000]\ny_train_grid  = np.ravel(y_train_grid)\n# Fit the grid search object on the training data\ngrid_search.fit(X_train_grid, y_train_grid)\n\n# Get the best hyperparameters\nbest_params = grid_search.best_params_\n\n\n# Print the best hyperparameters and their corresponding score\nprint("Best hyperparameters: ", grid_search.best_params_)\nprint("Best score: ", grid_search.

In [12]:
print(len(y_train))
X_subset = X_train_hog
y_subset = y_train
print(len(X_subset ))

24252
24252


In [13]:
# Import necessary libraries
from sklearn.linear_model import LogisticRegression

# Instantiate the logistic regression model
lr_model = LogisticRegression(C=10, class_weight='balanced', penalty='l2', solver='liblinear', multi_class='ovr')

# Fit the model to the training data
lr_model.fit(X_subset, y_subset)

  y = column_or_1d(y, warn=True)


In [14]:
X_test, y_test = get_X_y_from_loader(test_loader)

100%|██████████| 250/250 [04:17<00:00,  1.03s/it]


In [15]:
X_test_hog = []
for i in tqdm(range(len(X_test))):
    #X_hog = X_train[i].reshape((224,224))
    hog_features = hog(X_test[i], pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), orientations=2, block_norm='L2-Hys',
                              feature_vector=True)
    X_test_hog.append(hog_features)
print(X_test_hog[0].shape)
print(len(X_test_hog))

100%|██████████| 7987/7987 [03:00<00:00, 44.17it/s]

(5832,)
7987





In [16]:
from sklearn.metrics import accuracy_score
# Make predictions on the testing data
y_pred = lr_model.predict(X_test_hog)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.4735194691373482


In [17]:
from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score, f1_score

# Calculate balanced accuracy score
balanced_acc = balanced_accuracy_score(y_test, y_pred)
print("Balanced accuracy score:", balanced_acc)

# Calculate precision score
precision = precision_score(y_test, y_pred,average='weighted')
print("Precision score:", precision)

# Calculate recall score
recall = recall_score(y_test, y_pred,average='weighted')
print("Recall score:", recall)

# Calculate f1 score
f1 = f1_score(y_test, y_pred,average='weighted')
print("F1 score weighted:", f1)

# Calculate f1 score
f1 = f1_score(y_test, y_pred,average='micro')
print("F1 score Micro:", f1)

# Calculate f1 score
f1 = f1_score(y_test, y_pred,average='macro')
print("F1 score Macro:", f1)

Balanced accuracy score: 0.41082970409007274
Precision score: 0.45744061268156644
Recall score: 0.4735194691373482
F1 score weighted: 0.4622126660936432
F1 score Micro: 0.4735194691373482
F1 score Macro: 0.41225010506483506
