In [1]:
import torch
import torch.nn as nn
import torchvision.models as models

from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets

import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve
import plotly.express as px

from tqdm.notebook import tqdm

from sklearn.model_selection import StratifiedKFold

In [2]:
image_size = 256
batch_size = 64
device = 'mps'

preprocess = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
def model_validation(k_fold, epochs):
    k_folds = k_fold
    epochs = epochs

    # Load Dataset
    dataset = datasets.ImageFolder(root='./dataset/train', transform=preprocess)
    targets = [sample[1] for sample in dataset]  # Get the labels for stratification
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=0)
    results = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(dataset)), targets)):
        print(f"Fold {fold+1}/{k_folds}")

        # Create dataloaders for the current fold
        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
        for param in model.parameters():
            param.requires_grad = False
        model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(2048, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
        )
        for param in model.fc.parameters():
            param.requires_grad = True
        model.to(device)

        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
    
        # Training
        model.train()
        for epoch in range(epochs):
            train_loss = 0.0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                labels = labels.unsqueeze(1).float()
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item() * images.size(0)
            train_loss /= len(train_loader.dataset)
            print(f"Fold {fold+1}, Epoch {epoch+1}, Train Loss: {train_loss:.4f}")
        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                labels = labels.unsqueeze(1).float()
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                outputs = torch.sigmoid(outputs)
                preds = (outputs > 0.50).int()
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        val_loss /= len(val_loader.dataset)
        accuracy = correct / total
        results.append({'fold': fold+1, 'val_loss': val_loss, 'accuracy': accuracy})
        print(f"Fold {fold+1}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}")
    # Final Results
    print("\nFinal Cross-Validation Results")
    print("-------------------------------")
    avg_acc = sum(r['accuracy'] for r in results) / k_folds
    avg_std = np.std([r['accuracy'] for r in results])
    print(f"Average Accuracy: {avg_acc:.4f}")
    print(f"Accuracy Std: {avg_std:.4f}")

In [7]:
model_validation(k_fold=5, epochs=3)

Fold 1/5
Fold 1, Epoch 1, Train Loss: 0.3169
Fold 1, Epoch 2, Train Loss: 0.1730
Fold 1, Epoch 3, Train Loss: 0.1452
Fold 1, Val Loss: 1.1501, Accuracy: 0.4990
Fold 2/5
Fold 2, Epoch 1, Train Loss: 0.3146
Fold 2, Epoch 2, Train Loss: 0.1703
Fold 2, Epoch 3, Train Loss: 0.1390
Fold 2, Val Loss: 0.1229, Accuracy: 0.9521
Fold 3/5
Fold 3, Epoch 1, Train Loss: 0.3069
Fold 3, Epoch 2, Train Loss: 0.1674
Fold 3, Epoch 3, Train Loss: 0.1293
Fold 3, Val Loss: 0.1874, Accuracy: 0.9252
Fold 4/5
Fold 4, Epoch 1, Train Loss: 0.3233
Fold 4, Epoch 2, Train Loss: 0.1635
Fold 4, Epoch 3, Train Loss: 0.1509
Fold 4, Val Loss: 0.1195, Accuracy: 0.9511
Fold 5/5
Fold 5, Epoch 1, Train Loss: 0.2902
Fold 5, Epoch 2, Train Loss: 0.1642
Fold 5, Epoch 3, Train Loss: 0.1404
Fold 5, Val Loss: 0.1122, Accuracy: 0.9588

Final Cross-Validation Results
-------------------------------
Average Accuracy: 0.8572
Accuracy Std: 0.1795


In [3]:
dataset_train = datasets.ImageFolder(root = './dataset/train', transform=preprocess)
dataset_val = datasets.ImageFolder(root = './dataset/val', transform=preprocess)
dataset_test = datasets.ImageFolder(root = './dataset/test', transform=preprocess)

train_set = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=4)
val_set = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=4)
test_set = DataLoader(dataset_test, batch_size=batch_size, shuffle=False, num_workers=4)

In [4]:
final_model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
for param in final_model.parameters():
    param.requires_grad = False
final_model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(2048, 256),
    nn.ReLU(),
    nn.Linear(256, 1),
)
for param in final_model.fc.parameters():
    param.requires_grad = True


In [5]:
final_model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.RMSprop(final_model.parameters(), lr=0.001)

In [6]:
epochs = 5

for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in tqdm(train_set):
        images, labels = images.to('mps'), labels.to('mps')  # Binary labels with shape (-1, 1)
        optimizer.zero_grad()
        outputs = final_model(images)
        labels = labels.unsqueeze(1).float()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_set.dataset)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

  0%|          | 0/82 [00:00<?, ?it/s]

Epoch 1/5, Loss: 0.3363


  0%|          | 0/82 [00:00<?, ?it/s]

Epoch 2/5, Loss: 0.2536


  0%|          | 0/82 [00:00<?, ?it/s]

Epoch 3/5, Loss: 0.2347


  0%|          | 0/82 [00:00<?, ?it/s]

Epoch 4/5, Loss: 0.2252


  0%|          | 0/82 [00:00<?, ?it/s]

Epoch 5/5, Loss: 0.2116


In [7]:
final_model.eval()
y_pred = np.array([])
y_true = np.array(test_set.dataset.targets)
with torch.no_grad():
    for images, _ in tqdm(test_set):
        images = images.to('mps')
        outputs = final_model(images)
        y_pred = np.append(y_pred, torch.sigmoid(outputs).view(-1).cpu().numpy())

  0%|          | 0/10 [00:00<?, ?it/s]

In [8]:
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_df = pd.DataFrame({
    'FPR': fpr,
    'TPR': tpr,
    'Thresholds': thresholds
})
    
fig = px.area(roc_df, x = 'FPR', y = 'TPR', hover_data=['Thresholds'])
auc_score = roc_auc_score(y_true, y_pred)
fig.add_annotation(x=0.5, y=0.5, text=f'AUC={auc_score:.5f}', showarrow=False)
fig.show()

In [9]:
y_pred_thr = (y_pred > 0.99).astype(int)
accuracy = accuracy_score(y_true, y_pred_thr)
conf_matrix = confusion_matrix(y_true, y_pred_thr)
print(f"Accuracy: {accuracy:.2f}")
print(f"Confusion matrix:\n{conf_matrix}")

Accuracy: 0.85
Confusion matrix:
[[223  11]
 [ 84 306]]


In [10]:
model_file_path = '../models/pytorch_model.pth'

torch.save(final_model.state_dict(), model_file_path)