In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sea
import os
from tqdm.notebook import tqdm
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, models
import torch.nn.functional as F
from PIL import Image
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from sklearn.metrics import accuracy_score, f1_score
from pathlib import Path
import random
import pickle
from sklearn.utils.class_weight import compute_class_weight


In [11]:
class cfg:
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_epochs = 20
    num_class = 3
    lr = 1e-4
    base_model = torchvision.models.resnet18(weights='IMAGENET1K_V1')
    lr_decay = 0.1
    patience = 5
    batch_size = 64
    criterion = nn.CrossEntropyLoss()


In [12]:
# Adjust your dataset path here
path = "A:/project_x/Flask-Knee-Osteoarthritis-Classification/dataset/"

# Relabeling dictionary
new_label = {
    0: 0,
    1: 1,
    2: 1,
    3: 2,
    4: 2
}


In [13]:
# Train Data
image_path = []
labels = []

for label in [0, 2, 3, 4]:
    image_list = os.listdir(f"{path}train/{label}")
    for p in image_list:
        image_path += [f"{path}train/{label}/" + p]
    labels += [new_label[label]] * len(image_list)

train_data = pd.DataFrame({"Filepath": image_path, "Labels": labels})
train_data.shape

# Validation Data
image_path = []
labels = []

for label in [0, 2, 3, 4]:
    image_list = os.listdir(f"{path}val/{label}")
    for p in image_list:
        image_path += [f"{path}val/{label}/" + p]
    labels += [new_label[label]] * len(image_list)

val_data = pd.DataFrame({"Filepath": image_path, "Labels": labels})
val_data.shape


(673, 2)

In [14]:
class custom_dataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        image = Image.open(img_name)
        label = self.df.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label


In [15]:
pre_processing = {
    'train': transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((256, 256)),
        transforms.RandomRotation(degrees=15),
        transforms.RandomResizedCrop((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'eval': transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((256, 256)),
        transforms.CenterCrop((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}


In [16]:
custom_train = custom_dataset(train_data, transform=pre_processing['train'])
custom_val = custom_dataset(val_data, transform=pre_processing['eval'])

train_loader = DataLoader(custom_train, batch_size=cfg.batch_size, shuffle=True)
val_loader = DataLoader(custom_val, batch_size=cfg.batch_size)


In [17]:
model = cfg.base_model
model.fc = nn.Linear(model.fc.in_features, cfg.num_class)

# Loss and Optimizer
criterion = cfg.criterion
optimizer = optim.Adam(model.parameters(), lr=cfg.lr)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=cfg.patience, factor=cfg.lr_decay, verbose=True)

# Send model to device
device = cfg.device
model = model.to(device)

# Number of epochs
epochs = cfg.num_epochs

# Saving best model
best_model_params_path = os.path.join('./', 'best_model_params.pt')




In [18]:
y_tot = np.array(labels)
cl = np.unique(y_tot)
wts = compute_class_weight('balanced', classes=cl, y=y_tot)
wts = torch.tensor(wts).to(device)


In [19]:
def train_it(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    num_samples = 0

    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        total_loss += loss.item() * inputs.shape[0]
        optimizer.step()
        num_samples += inputs.shape[0]

    return total_loss / num_samples

In [20]:
def test_it(model, val_loader, optimizer, criterion, device):
    model.eval()
    with torch.no_grad():
        total_loss = 0.0
        num_samples = 0
        preds = []
        true = []

        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * inputs.shape[0]
            num_samples += inputs.shape[0]
            _, predicted = torch.max(outputs.data, 1)
            preds.extend(predicted.to('cpu'))
            true.extend(labels.to('cpu'))

        val_acc = accuracy_score(true, preds)
        val_f1s = f1_score(true, preds, average=None)

        print('-' * 69)
        print(f'Validation accuracy: {val_acc * 100:.2f}%')
        print(f'Validation F1 scores: {val_f1s}')
        print('-' * 69)

        avg_val_loss = total_loss / num_samples
    return avg_val_loss, val_acc, val_f1s


In [24]:
def trainloop(epochs,model,train_loader,val_loader,optimizer,criterion,scheduler,device):
    best_acc=0
    for epoch in range(epochs):
        print(f'Epoch {epoch+1} started: ')
        train_loss=train_it(model,train_loader,optimizer,criterion,device)
        avg_val_loss, val_acc, val_f1s=test_it(model,val_loader,optimizer,criterion,device)
        if(val_acc>best_acc):
            best_acc=val_acc
            torch.save(model.state_dict(), best_model_params_path)
#             with open('model_pickle','wb') as f:
#                 pickle.dump(model,f)
            print('Improved and Saved Model')
    
        scheduler.step(avg_val_loss)
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}: Learning Rate = {current_lr}")
    print('='*69)

In [25]:
trainloop(epochs,model,train_loader,val_loader,optimizer,criterion,scheduler,device)

Epoch 1 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 71.47%
Validation F1 scores: [0.77945619 0.61098901 0.73362445]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 1: Learning Rate = 0.0001
Epoch 2 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 71.62%
Validation F1 scores: [0.79008746 0.61978022 0.68292683]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 2: Learning Rate = 0.0001
Epoch 3 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 71.77%
Validation F1 scores: [0.79365079 0.48554913 0.81147541]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 3: Learning Rate = 0.0001
Epoch 4 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 72.07%
Validation F1 scores: [0.8055207  0.3902439  0.82442748]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 4: Learning Rate = 0.0001
Epoch 5 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 73.11%
Validation F1 scores: [0.81212121 0.62472885 0.71111111]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 5: Learning Rate = 0.0001
Epoch 6 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 76.67%
Validation F1 scores: [0.82978723 0.66055046 0.78571429]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 6: Learning Rate = 0.0001
Epoch 7 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 73.85%
Validation F1 scores: [0.82071713 0.5045045  0.8       ]
---------------------------------------------------------------------
Epoch 7: Learning Rate = 0.0001
Epoch 8 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 76.52%
Validation F1 scores: [0.82234957 0.60427807 0.83941606]
---------------------------------------------------------------------
Epoch 8: Learning Rate = 0.0001
Epoch 9 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 79.35%
Validation F1 scores: [0.86005831 0.66666667 0.81395349]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 9: Learning Rate = 0.0001
Epoch 10 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 77.56%
Validation F1 scores: [0.83834049 0.65048544 0.80851064]
---------------------------------------------------------------------
Epoch 10: Learning Rate = 0.0001
Epoch 11 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 79.20%
Validation F1 scores: [0.85714286 0.66331658 0.81679389]
---------------------------------------------------------------------
Epoch 11: Learning Rate = 0.0001
Epoch 12 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 78.31%
Validation F1 scores: [0.8427673  0.67764706 0.80701754]
---------------------------------------------------------------------
Epoch 12: Learning Rate = 0.0001
Epoch 13 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 72.51%
Validation F1 scores: [0.8125     0.51111111 0.7706422 ]
---------------------------------------------------------------------
Epoch 13: Learning Rate = 0.0001
Epoch 14 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 77.86%
Validation F1 scores: [0.8487395  0.63185379 0.80321285]
---------------------------------------------------------------------
Epoch 14: Learning Rate = 0.0001
Epoch 15 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 79.49%
Validation F1 scores: [0.87215909 0.68852459 0.75348837]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 15: Learning Rate = 0.0001
Epoch 16 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 79.79%
Validation F1 scores: [0.86562942 0.66141732 0.81395349]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 16: Learning Rate = 0.0001
Epoch 17 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 77.12%
Validation F1 scores: [0.8423913  0.60916442 0.80334728]
---------------------------------------------------------------------
Epoch 17: Learning Rate = 1e-05
Epoch 18 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 80.24%
Validation F1 scores: [0.86337209 0.69544365 0.81327801]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 18: Learning Rate = 1e-05
Epoch 19 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 80.09%
Validation F1 scores: [0.86011905 0.7037037  0.80991736]
---------------------------------------------------------------------
Epoch 19: Learning Rate = 1e-05
Epoch 20 started: 


  0%|          | 0/74 [00:00<?, ?it/s]

---------------------------------------------------------------------
Validation accuracy: 80.98%
Validation F1 scores: [0.86982249 0.71194379 0.81481481]
---------------------------------------------------------------------
Improved and Saved Model
Epoch 20: Learning Rate = 1e-05


In [21]:
# Test Data
image_path = []
labels = []

for label in [0, 2, 3, 4]:
    image_list = os.listdir(f"{path}test/{label}")
    for p in image_list:
        image_path += [f"{path}test/{label}/" + p]
    labels += [new_label[label]] * len(image_list)

test_data = pd.DataFrame({"Filepath": image_path, "Labels": labels})

# Model setup for testing
model = cfg.base_model
model.fc = nn.Linear(model.fc.in_features, cfg.num_class)
model.to(device)

custom_test = custom_dataset(test_data, transform=pre_processing['eval'])
test_loader = DataLoader(custom_test, batch_size=cfg.batch_size)

model.load_state_dict(torch.load(best_model_params_path))

with open('model_pickle.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('model_pickle.pkl', 'rb') as f:
    model = pickle.load(f)

test_it(model, test_loader, optimizer, criterion, device)

---------------------------------------------------------------------
Validation accuracy: 82.65%
Validation F1 scores: [0.87768718 0.73063584 0.85375494]
---------------------------------------------------------------------


(0.40109225178466124,
 0.8264705882352941,
 array([0.87768718, 0.73063584, 0.85375494]))