# initial training

In [None]:
# ==== CORE IMPORTS & SETUP ====
import os
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

import torchvision
from torchvision import transforms
from torchvision.datasets import CelebA
from torchvision.models import resnet18, ResNet18_Weights

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount("/content/drive")

# ---- PATHS ----
ROOT_DIR = "/content"
DATA_DIR = os.path.join(ROOT_DIR, "data")
os.makedirs(DATA_DIR, exist_ok=True)

MODEL_DIR = "/content/drive/MyDrive/thesis2/models"
os.makedirs(MODEL_DIR, exist_ok=True)

In [None]:
BEST_MODEL_PATH = os.path.join(MODEL_DIR, "smile_classifier_best.pt")
LOG_PATH        = os.path.join(MODEL_DIR, "smile_classifier_training_log.csv")

print("Best model will be saved to:", BEST_MODEL_PATH)
print("Training log will be saved to:", LOG_PATH)

# ---- DEVICE ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

In [None]:
# ---- TRAINING CONFIG (compute-friendly) ----
MAX_TRAIN_SAMPLES = 25_000   # subset of CelebA for speed
VAL_RATIO = 0.1

BATCH_SIZE    = 128
MAX_EPOCHS    = 15      # upper bound, early stopping will cut earlier
PATIENCE      = 3       # epochs without val improvement before stopping
LEARNING_RATE = 3e-4
USE_AMP       = True    # mixed precision on L4 GPU


In [None]:
# ==== CELEBA WRAPPER FOR SMILE LABEL ====
class CelebASmileDataset(Dataset):
    """
    Wraps torchvision.datasets.CelebA to provide:
      - image (PIL -> transformed tensor)
      - label: 0 = not smiling, 1 = smiling
    """
    def __init__(self, root, split="train", transform=None, download=False):
        self.celeba = CelebA(
            root=root,
            split=split,
            target_type="attr",
            download=download,
        )
        self.transform = transform

        # Find index of "Smiling" attribute
        self.smile_idx = self.celeba.attr_names.index("Smiling")
        print(f"Split={split}: total samples = {len(self.celeba)}")
        print("Smiling attribute index:", self.smile_idx)

    def __len__(self):
        return len(self.celeba)

    def __getitem__(self, idx):
        img, attrs = self.celeba[idx]
        # attrs is a tensor of shape [40], values -1 or 1
        smile_attr = attrs[self.smile_idx].item()
        label = 1 if smile_attr == 1 else 0  # smiling -> 1, else 0

        if self.transform is not None:
            img = self.transform(img)

        return img, label


In [None]:
# ==== TRANSFORMS ====
# Stronger augmentation on train to reduce overfitting
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225],
    ),
])

# Validation: no augmentation, just resize + normalize
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225],
    ),
])

# ==== LOAD CELEBA TRAIN SPLIT ====
full_train_ds = CelebASmileDataset(
    root=DATA_DIR,
    split="train",
    transform=train_transform,
    download=True,
)

# Subsample for compute reasons
if MAX_TRAIN_SAMPLES is not None and len(full_train_ds) > MAX_TRAIN_SAMPLES:
    full_train_ds, _ = random_split(
        full_train_ds,
        [MAX_TRAIN_SAMPLES, len(full_train_ds) - MAX_TRAIN_SAMPLES]
    )
    print(f"Subsampled train dataset to {MAX_TRAIN_SAMPLES} samples.")

# Train / val split
num_train = int((1.0 - VAL_RATIO) * len(full_train_ds))
num_val   = len(full_train_ds) - num_train

train_ds, val_ds = random_split(full_train_ds, [num_train, num_val])
print(f"Train samples: {len(train_ds)}, Val samples: {len(val_ds)}")

# Set val transform explicitly
val_ds.dataset.transform = val_transform

# ==== DATALOADERS ====
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)


In [None]:
from torch.cuda.amp import autocast, GradScaler

# ==== MODEL CREATION ====
def create_smile_model():
    m = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    num_features = m.fc.in_features
    # Add dropout to combat overfitting
    m.fc = nn.Sequential(
        nn.Dropout(p=0.4),
        nn.Linear(num_features, 2),
    )
    return m

model = create_smile_model().to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

scaler = GradScaler(enabled=USE_AMP)

print("Model ready on:", device)


In [None]:
# ==== TRAIN & EVAL HELPERS ====
def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler=None, use_amp=True):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for imgs, labels in tqdm(dataloader, desc="Train", leave=False):
        imgs = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()

        if use_amp and scaler is not None:
            with autocast():
                outputs = model(imgs)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc


def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in tqdm(dataloader, desc="Val", leave=False):
            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(imgs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc


In [None]:
# ==== TRAINING WITH EARLY STOPPING & LOGGING ====
best_val_acc = 0.0
best_epoch = 0
best_model_wts = copy.deepcopy(model.state_dict())
epochs_no_improve = 0

history = {
    "epoch": [],
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": [],
}

print(f"Starting training with early stopping (patience={PATIENCE})")

for epoch in range(1, MAX_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{MAX_EPOCHS}")
    print("-" * 40)

    train_loss, train_acc = train_one_epoch(
        model,
        train_loader,
        optimizer,
        criterion,
        device,
        scaler=scaler,
        use_amp=USE_AMP,
    )
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)

    scheduler.step()

    print(f"Epoch {epoch}/{MAX_EPOCHS} "
          f"- train loss: {train_loss:.4f} - train acc: {train_acc:.4f} "
          f"- val loss: {val_loss:.4f} - val acc: {val_acc:.4f}")

    # log history
    history["epoch"].append(epoch)
    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)

    # check improvement
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(best_model_wts, BEST_MODEL_PATH)
        print(f"  ‚úÖ New best model at epoch {epoch}, val acc = {best_val_acc:.4f}")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        print(f"  No val improvement for {epochs_no_improve} epoch(s).")

    # early stopping
    if epochs_no_improve >= PATIENCE:
        print(f"\n‚èπ Early stopping triggered at epoch {epoch}.")
        break

print("\nTraining finished.")
print(f"Best epoch: {best_epoch} with val acc = {best_val_acc:.4f}")
print("Best model saved to:", BEST_MODEL_PATH)

# save history to CSV
df_history = pd.DataFrame(history)
df_history.to_csv(LOG_PATH, index=False)
print("Training log saved to:", LOG_PATH)


In [None]:
def plot_training_curves(history):
    epochs = history["epoch"]

    plt.figure(figsize=(12,5))

    # Loss
    plt.subplot(1,2,1)
    plt.plot(epochs, history["train_loss"], label="Train Loss", marker="o")
    plt.plot(epochs, history["val_loss"], label="Val Loss", marker="o")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training vs Validation Loss")
    plt.grid(True, alpha=0.3)
    plt.legend()

    # Accuracy
    plt.subplot(1,2,2)
    plt.plot(epochs, history["train_acc"], label="Train Acc", marker="o")
    plt.plot(epochs, history["val_acc"], label="Val Acc", marker="o")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Training vs Validation Accuracy")
    plt.grid(True, alpha=0.3)
    plt.legend()

    plt.tight_layout()
    plt.show()

# Call this after training:
plot_training_curves(history)


In [None]:
import torch
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image

# --- same create_smile_model as in training ---
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn as nn

def create_smile_model():
    m = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    num_features = m.fc.in_features
    m.fc = nn.Sequential(
        nn.Dropout(p=0.4),
        nn.Linear(num_features, 2),
    )
    return m

# load best weights from Drive
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

smile_model = create_smile_model()
smile_model.load_state_dict(torch.load(BEST_MODEL_PATH, map_location=device))
smile_model.to(device)
smile_model.eval()

print("Loaded best smile classifier from:", BEST_MODEL_PATH)


In [None]:
# Preprocessing for test images (same as val)
inference_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225],
    ),
])

def load_and_preprocess_image(path):
    img = Image.open(path).convert("RGB")
    return inference_transform(img), img  # return tensor + original PIL for display

def predict_smile_probability(model, image_tensor, device):
    """
    image_tensor: (3, H, W) normalized as in training.
    returns: float in [0,1] = probability of 'smiling' (class 1)
    """
    model.eval()
    with torch.no_grad():
        x = image_tensor.unsqueeze(0).to(device)  # (1,3,H,W)
        logits = model(x)
        probs = F.softmax(logits, dim=1)
        smile_prob = probs[0, 1].item()
    return smile_prob


In [None]:
from google.colab import files

uploaded = files.upload()

for fname in uploaded.keys():
    print("Testing:", fname)
    img_tensor, img_pil = load_and_preprocess_image(fname)
    p_smile = predict_smile_probability(smile_model, img_tensor, device)

    plt.imshow(img_pil)
    plt.axis("off")
    plt.title(f"{fname}\nP(smiling) = {p_smile:.3f}")
    plt.show()

    print("Smile probability:", p_smile)
    print("Prediction:", "SMILING üòÑ" if p_smile >= 0.5 else "NOT SMILING üòê")


# fine tuning

In [None]:
# ==== Cell 1: imports + paths + device =======================================

import os
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18, ResNet18_Weights

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix,
)

import pandas as pd
import json
from PIL import Image
import torch.nn.functional as F

# ---- Paths ----
DATA_ROOT     = "/content/drive/MyDrive/thesis2/classifier_dataset"
OLD_MODEL_PATH = "/content/drive/MyDrive/thesis2/models/smile_classifier_best.pt"
NEW_MODEL_PATH = "/content/drive/MyDrive/thesis2/models/smile_classifier_sd35_finetuned.pt"
PLOTS_DIR      = "/content/drive/MyDrive/thesis2/plots"

os.makedirs(PLOTS_DIR, exist_ok=True)

# ---- Device ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


In [None]:
# ==== Cell 2: recreate model + load previous weights =========================

def create_smile_model():
    """
    ResNet18 backbone pretrained on ImageNet, with:
    - dropout
    - 2-class output (neutral, smiling)
    """
    model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(p=0.4),
        nn.Linear(num_features, 2),
    )
    return model

model = create_smile_model().to(device)

state_dict = torch.load(OLD_MODEL_PATH, map_location=device)
load_result = model.load_state_dict(state_dict, strict=False)
print("Loaded state dict from:", OLD_MODEL_PATH)
print("Missing keys   :", load_result.missing_keys)
print("Unexpected keys:", load_result.unexpected_keys)

if not load_result.missing_keys and not load_result.unexpected_keys:
    print(" State dict loaded cleanly.")
else:
    print(" There are missing/unexpected keys ‚Äì check if architecture changed.")


In [None]:
# ==== Cell 3: dataset, transforms, dataloaders ===============================

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]
IMG_SIZE = 512

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

val_test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

# Base dataset just to know files + classes
base_dataset = ImageFolder(root=DATA_ROOT)
num_samples = len(base_dataset)
print("Total samples:", num_samples)
print("Classes:", base_dataset.classes)

# 70 / 15 / 15 split
n_train = int(0.70 * num_samples)
n_val   = int(0.15 * num_samples)
n_test  = num_samples - n_train - n_val

g = torch.Generator().manual_seed(42)
indices = torch.randperm(num_samples, generator=g).tolist()

train_idx = indices[:n_train]
val_idx   = indices[n_train:n_train + n_val]
test_idx  = indices[n_train + n_val:]

# Datasets with transforms
train_dataset_full = ImageFolder(root=DATA_ROOT, transform=train_transform)
val_dataset_full   = ImageFolder(root=DATA_ROOT, transform=val_test_transform)
test_dataset_full  = ImageFolder(root=DATA_ROOT, transform=val_test_transform)

train_dataset = Subset(train_dataset_full, train_idx)
val_dataset   = Subset(val_dataset_full,   val_idx)
test_dataset  = Subset(test_dataset_full,  test_idx)

print(f"Train / Val / Test sizes: {len(train_dataset)}, {len(val_dataset)}, {len(test_dataset)}")

# Batch size
BATCH_SIZE = 64 if device.type == "cuda" else 32
print("Using batch size:", BATCH_SIZE)

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=2, pin_memory=(device.type == "cuda"),
)
val_loader = DataLoader(
    val_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=2, pin_memory=(device.type == "cuda"),
)
test_loader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=2, pin_memory=(device.type == "cuda"),
)


In [None]:
# ==== Cell 4: freeze backbone, set optimizer, scheduler, loss ===============

# 1) Freeze everything
for p in model.parameters():
    p.requires_grad = False

# 2) Unfreeze layer3, layer4, and fc head
for name, p in model.named_parameters():
    if name.startswith("layer3") or name.startswith("layer4") or name.startswith("fc."):
        p.requires_grad = True

backbone_l3_params = []
backbone_l4_params = []
head_params        = []

for name, p in model.named_parameters():
    if not p.requires_grad:
        continue
    if name.startswith("layer3"):
        backbone_l3_params.append(p)
    elif name.startswith("layer4"):
        backbone_l4_params.append(p)
    elif name.startswith("fc."):
        head_params.append(p)

print(
    f"Trainable params ‚Äì layer3: {len(backbone_l3_params)}, "
    f"layer4: {len(backbone_l4_params)}, head: {len(head_params)}"
)

optimizer = optim.AdamW(
    [
        {"params": backbone_l3_params, "lr": 5e-6},
        {"params": backbone_l4_params, "lr": 1e-5},
        {"params": head_params,        "lr": 5e-5},
    ],
    weight_decay=1e-4,
)

criterion = nn.CrossEntropyLoss()




In [None]:
# LR scheduler ‚Äì reduces LR when val loss plateaus
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.3,
    patience=2
)


In [None]:
# ==== Cell 5: helper functions ==============================================

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = correct / total
    return epoch_loss, epoch_acc


def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = correct / total
    return epoch_loss, epoch_acc


In [None]:
# ==== Cell 6: training loop with early stopping + scheduler ==================

MAX_EPOCHS = 50
PATIENCE   = 6  # epochs without val-loss improvement

train_losses, val_losses = [], []
train_accs,   val_accs   = [], []

best_val_loss = float("inf")
best_epoch    = -1
epochs_no_improve = 0

for epoch in range(1, MAX_EPOCHS + 1):
    print(f"\nEpoch {epoch}/{MAX_EPOCHS}")
    print("-" * 40)

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss,   val_acc   = evaluate(model, val_loader, criterion, device)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(
        f"Train loss: {train_loss:.4f} | Train acc: {train_acc:.4f} | "
        f"Val loss: {val_loss:.4f} | Val acc: {val_acc:.4f}"
    )

    # Scheduler step on validation loss
    scheduler.step(val_loss)

    # Early stopping logic
    if val_loss < best_val_loss - 1e-4:
        best_val_loss = val_loss
        best_epoch = epoch
        epochs_no_improve = 0
        torch.save(model.state_dict(), NEW_MODEL_PATH)
        print(f"  New best model saved (epoch {epoch}) with val loss {val_loss:.4f}")
    else:
        epochs_no_improve += 1
        print(f"  No improvement in val loss for {epochs_no_improve} epoch(s).")

    if epochs_no_improve >= PATIENCE:
        print("\n‚èπ Early stopping triggered.")
        break

print("\nTraining finished.")
print(f"Best epoch: {best_epoch}, best val loss: {best_val_loss:.4f}")
print("Best model path:", NEW_MODEL_PATH)


In [None]:
# ==== Cell 7: save history + plot curves ====================================

history = {
    "epoch": list(range(1, len(train_losses) + 1)),
    "train_loss": train_losses,
    "val_loss":   val_losses,
    "train_acc":  train_accs,
    "val_acc":    val_accs,
}

log_csv_path  = os.path.join(PLOTS_DIR, "sd35_finetune_training_log.csv")
log_json_path = os.path.join(PLOTS_DIR, "sd35_finetune_training_log.json")
curves_path   = os.path.join(PLOTS_DIR, "sd35_finetune_training_curves.png")

# Save CSV
df_hist = pd.DataFrame(history)
df_hist.to_csv(log_csv_path, index=False)
print("Saved training log CSV to:", log_csv_path)

# Save JSON
with open(log_json_path, "w") as f:
    json.dump(history, f, indent=4)
print("Saved training log JSON to:", log_json_path)

# Plot curves
epochs = history["epoch"]

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Loss
axes[0].plot(epochs, train_losses, label="Train Loss")
axes[0].plot(epochs, val_losses,   label="Val Loss")
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].set_title("Training vs Validation Loss")
axes[0].legend()

# Accuracy
axes[1].plot(epochs, train_accs, label="Train Acc")
axes[1].plot(epochs, val_accs,   label="Val Acc")
axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("Accuracy")
axes[1].set_title("Training vs Validation Accuracy")
axes[1].legend()

plt.tight_layout()
plt.savefig(curves_path, dpi=200)
plt.show()

print("Saved training curves to:", curves_path)


In [None]:
# ==== Cell 8: test metrics + confusion matrix ===============================

best_model = create_smile_model().to(device)
best_model.load_state_dict(torch.load(NEW_MODEL_PATH, map_location=device))
best_model.eval()

all_labels = []
all_preds  = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = best_model(images)
        _, preds = torch.max(outputs, 1)

        all_labels.append(labels.cpu().numpy())
        all_preds.append(preds.cpu().numpy())

all_labels = np.concatenate(all_labels)
all_preds  = np.concatenate(all_preds)

test_acc = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(
    all_labels, all_preds, average="binary", pos_label=1
)

print(f"Test accuracy        : {test_acc:.4f}")
print(f"Precision (smiling)  : {precision:.4f}")
print(f"Recall (smiling)     : {recall:.4f}")
print(f"F1-score (smiling)   : {f1:.4f}")

cm = confusion_matrix(all_labels, all_preds)
print("\nConfusion matrix (rows=true, cols=pred):\n", cm)

# Plot confusion matrix
cm_path = os.path.join(PLOTS_DIR, "smile_classifier_sd35_confusion_matrix.png")

fig, ax = plt.subplots(figsize=(4, 4))
sns.heatmap(
    cm,
    annot=True, fmt="d", cmap="Blues",
    xticklabels=base_dataset.classes,
    yticklabels=base_dataset.classes,
    ax=ax,
)
ax.set_xlabel("Predicted")
ax.set_ylabel("True")
ax.set_title("Smile vs Neutral ‚Äì Confusion Matrix")
plt.tight_layout()
plt.savefig(cm_path, dpi=200)
plt.show()

print("Saved confusion matrix plot to:", cm_path)


In [None]:
# ==== Cell 9: predict on a single image =====================================

idx_to_class = {v: k for k, v in base_dataset.class_to_idx.items()}
print("Class mapping:", idx_to_class)

def predict_image(model, image_path, device):
    """
    Run the fine-tuned model on a single image path.
    Prints predicted label and P(smiling).
    """
    model.eval()
    img = Image.open(image_path).convert("RGB")

    x = val_test_transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(x)
        probs = F.softmax(logits, dim=1).cpu().numpy()[0]

    pred_idx = int(np.argmax(probs))
    pred_label = idx_to_class[pred_idx]
    prob_smile = float(probs[base_dataset.class_to_idx["smiling"]])

    print(f"Image: {image_path}")
    print(f"Predicted label : {pred_label}")
    print(f"P(smiling)      : {prob_smile:.4f}")
    return pred_label, prob_smile

# --- TEST A FEW SAMPLE IMAGES ---

sample_paths = [
    "/content/drive/MyDrive/thesis2/classifier_dataset/neutral/neutral_0000_p1.png",
    "/content/drive/MyDrive/thesis2/classifier_dataset/smiling/smiling_0001_p2.png",
]

for path in sample_paths:
    print("\nTesting:", path)
    predict_image(best_model, path, device)

