In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from read_data import get_dirs
import os

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image

## Data Loading

In [2]:
class PhotoDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row["path"]).convert("RGB")
        
        if self.transform:
            image = self.transform(image)

        label = torch.tensor(row["label"], dtype=torch.long)
        return image, label


In [3]:
transform = transforms.Compose([
    # change image size
    transforms.Resize((224, 224)),
    # converts PIL image to PyTorch tensor
    transforms.ToTensor()
])


In [4]:
df = get_dirs()

# initialising every image transformed
dataset = PhotoDataset(df, transform=transform)

# data wrapper for convenient usage to model
train_loader = DataLoader(dataset, batch_size=20, shuffle=True)

# 20 images/labels, 3 colors, 224 height, 224 width
# 20 at a time
images, labels = next(iter(train_loader))
print(images.shape, labels.shape)



torch.Size([20, 3, 224, 224]) torch.Size([20])


## Modeling

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using:", device)

# resnet18 model with default weights (pre-trained IMAGENET1K_V1)
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# X*W + c, using final layer nodes to output ???
model.fc = nn.Linear(model.fc.in_features, 2)   # Alex vs Kelly
model = model.to(device)

Using: cpu


In [6]:
criterion = nn.CrossEntropyLoss()

# optimizer on weights using learning rate 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

## Initial Model

In [13]:
batch_losses = []
batch_accs = []

model.train()

for batch_idx, (imgs, lbls) in enumerate(train_loader):
    imgs, lbls = imgs.to(device), lbls.to(device)

    # clear old gradients
    optimizer.zero_grad()

    # output of model
    outputs = model(imgs)

    # calculate loss based on current output/weights
    loss = criterion(outputs, lbls)

    # backward propagation to train model
    loss.backward()

    # forward propagation to update parameters
    optimizer.step()

    preds = outputs.argmax(dim=1)
    acc = (preds == lbls).float().mean().item()

    batch_losses.append(loss.item())
    batch_accs.append(acc)

    print(f"Batch {batch_idx}: Loss = {loss.item():.4f}, Acc = {acc:.3f}")


Batch 0: Loss = 0.0132, Acc = 1.000
Batch 1: Loss = 0.0064, Acc = 1.000
Batch 2: Loss = 0.0088, Acc = 1.000
Batch 3: Loss = 0.0630, Acc = 0.950
Batch 4: Loss = 0.1334, Acc = 0.950
Batch 5: Loss = 0.2053, Acc = 0.950
Batch 6: Loss = 0.0074, Acc = 1.000
Batch 7: Loss = 0.0227, Acc = 1.000
Batch 8: Loss = 0.0069, Acc = 1.000
Batch 9: Loss = 0.0038, Acc = 1.000
Batch 10: Loss = 0.0280, Acc = 1.000
Batch 11: Loss = 0.0133, Acc = 1.000
Batch 12: Loss = 0.0081, Acc = 1.000
Batch 13: Loss = 0.0052, Acc = 1.000
Batch 14: Loss = 0.0409, Acc = 1.000
Batch 15: Loss = 0.0032, Acc = 1.000
Batch 16: Loss = 0.0059, Acc = 1.000
Batch 17: Loss = 0.0034, Acc = 1.000
Batch 18: Loss = 0.0100, Acc = 1.000
Batch 19: Loss = 0.1532, Acc = 1.000


## Grid Search

In [None]:
from sklearn.model_selection import StratifiedKFold
import itertools
import copy
import torch
import numpy as np

torch.manual_seed(42)

batch_sizes = [20, 25, 26]
lrs = [1e-3, 1e-4]
num_folds = 5
num_epochs_cv = 3
num_epochs_final = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

def cv_mean_acc(batch_size, lr):
    fold_accs = []
    for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['label'])):
        train_df = df.iloc[train_idx].reset_index(drop=True)
        val_df   = df.iloc[val_idx].reset_index(drop=True)

        train_ds = PhotoDataset(train_df, transform=transform)
        val_ds   = PhotoDataset(val_df,   transform=transform)

        train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
        val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False)

        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        model.fc = nn.Linear(model.fc.in_features, 2)
        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        best_val_loss = float('inf')
        best_wts = copy.deepcopy(model.state_dict())

        for epoch in range(num_epochs_cv):
            model.train()
            for imgs, lbls in train_loader:
                imgs, lbls = imgs.to(device), lbls.to(device)
                optimizer.zero_grad()
                outputs = model(imgs)
                loss = criterion(outputs, lbls)
                loss.backward()
                optimizer.step()

            # validate
            model.eval()
            val_correct = 0
            val_loss = 0.0
            with torch.no_grad():
                for imgs, lbls in val_loader:
                    imgs, lbls = imgs.to(device), lbls.to(device)
                    out = model(imgs)
                    loss = criterion(out, lbls)
                    val_loss += loss.item() * imgs.size(0)
                    val_correct += (out.argmax(1) == lbls).sum().item()
            epoch_val_loss = val_loss / len(val_ds)
            if epoch_val_loss < best_val_loss:
                best_val_loss = epoch_val_loss
                best_wts = copy.deepcopy(model.state_dict())

        # final eval for fold using best_wts
        model.load_state_dict(best_wts)
        model.eval()
        val_correct = 0
        with torch.no_grad():
            for imgs, lbls in val_loader:
                imgs, lbls = imgs.to(device), lbls.to(device)
                out = model(imgs)
                val_correct += (out.argmax(1) == lbls).sum().item()
        fold_accs.append(val_correct / len(val_ds))
    return np.mean(fold_accs), np.std(fold_accs)

# run grid search
results = []
for bs, lr in itertools.product(batch_sizes, lrs):
    mean_acc, std_acc = cv_mean_acc(bs, lr)
    results.append(((bs, lr), mean_acc, std_acc))
    print(f"grid: bs={bs} lr={lr} -> mean_acc={mean_acc:.4f} (±{std_acc:.4f})")

results = sorted(results, key=lambda x: x[1], reverse=True)
best_cfg, best_mean, best_std = results[0]
best_bs, best_lr = best_cfg
print(f"\nBest config: batch_size={best_bs}, lr={best_lr} -> mean_acc={best_mean:.4f} (±{best_std:.4f})")

# retrain final model on full data with best hyperparams
full_ds = PhotoDataset(df, transform=transform)
full_loader = DataLoader(full_ds, batch_size=best_bs, shuffle=True)

model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

for epoch in range(num_epochs_final):
    model.train()
    running_loss = 0.0
    running_correct = 0
    for imgs, lbls in full_loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, lbls)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * imgs.size(0)
        running_correct += (out.argmax(1) == lbls).sum().item()
    epoch_loss = running_loss / len(full_ds)
    epoch_acc = running_correct / len(full_ds)
    print(f"Final train Epoch {epoch+1}/{num_epochs_final}: loss={epoch_loss:.4f}, acc={epoch_acc:.3f}")

torch.save(model.state_dict(), "final_model_full.pth")
print("Saved final_model_full.pth")

grid: bs=20 lr=0.001 -> mean_acc=0.7464 (±0.0464)
grid: bs=20 lr=0.0001 -> mean_acc=0.8577 (±0.0239)
grid: bs=25 lr=0.001 -> mean_acc=0.7753 (±0.0640)
grid: bs=25 lr=0.0001 -> mean_acc=0.8536 (±0.0272)
grid: bs=26 lr=0.001 -> mean_acc=0.7608 (±0.0805)
grid: bs=26 lr=0.0001 -> mean_acc=0.8536 (±0.0329)

Best config: batch_size=20, lr=0.0001 -> mean_acc=0.8577 (±0.0239)
Final train Epoch 1/5: loss=0.4837, acc=0.738
Final train Epoch 2/5: loss=0.0791, acc=0.988
Final train Epoch 3/5: loss=0.0341, acc=1.000
Final train Epoch 4/5: loss=0.0167, acc=1.000
Final train Epoch 5/5: loss=0.0144, acc=0.996
Saved final_model_full.pth


# Initial Predictions