In [3]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import kagglehub


In [4]:
# Download latest version
path = kagglehub.dataset_download("alexattia/the-simpsons-characters-dataset")

for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/weights.best.hdf5
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/.DS_Store
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/characters_illustration.png
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/number_pic_char.csv
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/annotation.txt
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/simpsons_dataset/.DS_Store
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/simpsons_dataset/maggie_simpson/pic_0028.jpg
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-simpsons-characters-dataset/versions/4/simpsons_dataset/maggie_simpson/pic_0000.jpg
/Users/dmu06/.cache/kagglehub/datasets/alexattia/the-sim

In [8]:
import shutil
        
dataFolder = path + "/simpsons_dataset"
testFolder = path + "/kaggle_simpson_testset/kaggle_simpson_testset"

# copy the testset to the ./data
shutil.copytree(testFolder, "./data/kaggle_simpson_testset")

# map all folders to the real names
# maggie_simpson -> Maggie Simpson
# homer_simpson -> Homer Simpson
# marge_simpson -> Marge Simpson
# bart_simpson -> Bart Simpson
# lisa_simpson -> Lisa Simpson
# marge_simpson -> Marge Simpson
# bart_simpson -> Bart Simpson

character_mapping = []

for folder in os.listdir(dataFolder):
    if os.path.isdir(os.path.join(dataFolder, folder)):
        characterFolder = folder
        characterName = folder.replace("_", " ").title()
        character_mapping.append({
          "folder": characterFolder,
          "name": characterName
        })

print(character_mapping)

[{'folder': 'maggie_simpson', 'name': 'Maggie Simpson'}, {'folder': 'simpsons_dataset', 'name': 'Simpsons Dataset'}, {'folder': 'charles_montgomery_burns', 'name': 'Charles Montgomery Burns'}, {'folder': 'patty_bouvier', 'name': 'Patty Bouvier'}, {'folder': 'ralph_wiggum', 'name': 'Ralph Wiggum'}, {'folder': 'chief_wiggum', 'name': 'Chief Wiggum'}, {'folder': 'milhouse_van_houten', 'name': 'Milhouse Van Houten'}, {'folder': 'rainier_wolfcastle', 'name': 'Rainier Wolfcastle'}, {'folder': 'cletus_spuckler', 'name': 'Cletus Spuckler'}, {'folder': 'martin_prince', 'name': 'Martin Prince'}, {'folder': 'lenny_leonard', 'name': 'Lenny Leonard'}, {'folder': 'sideshow_bob', 'name': 'Sideshow Bob'}, {'folder': 'fat_tony', 'name': 'Fat Tony'}, {'folder': 'selma_bouvier', 'name': 'Selma Bouvier'}, {'folder': 'barney_gumble', 'name': 'Barney Gumble'}, {'folder': 'lionel_hutz', 'name': 'Lionel Hutz'}, {'folder': 'gil', 'name': 'Gil'}, {'folder': 'moe_szyslak', 'name': 'Moe Szyslak'}, {'folder': 'car

In [4]:
# stratify split

import json, shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

SOURCE = Path(dataFolder)  # point to your existing folder
DEST_TRAIN = Path("./data/processed/train")
DEST_VAL = Path("./data/processed/val")
DEST_TRAIN.mkdir(parents=True, exist_ok=True)
DEST_VAL.mkdir(parents=True, exist_ok=True)

all_samples = []
classes = sorted([d.name for d in SOURCE.iterdir() if d.is_dir()])
for cls in classes:
    for img in (SOURCE / cls).glob("*.jpg"):
        all_samples.append((img, cls))

# group by class then stratified split
by_class = {}
for p, cls in all_samples: by_class.setdefault(cls, []).append(p)

train_pairs, val_pairs = [], []
for cls, paths in by_class.items():
    paths = sorted(paths)
    train, val = train_test_split(paths, test_size=0.1, random_state=42)
    train_pairs += [(p, cls) for p in train]
    val_pairs += [(p, cls) for p in val]

def copy_pairs(pairs, dest_root):
    for src, cls in pairs:
        dest_dir = dest_root / cls
        dest_dir.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dest_dir / src.name)

copy_pairs(train_pairs, DEST_TRAIN)
copy_pairs(val_pairs, DEST_VAL)

with open("data/processed/classes.json", "w") as f:
    json.dump(classes, f, indent=2)
print(f"Train: {len(train_pairs)} | Val: {len(val_pairs)} | Classes: {len(classes)}")

Train: 18820 | Val: 2113 | Classes: 43


In [None]:
# train_simpsons.py
import json
from pathlib import Path
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.optim import AdamW
from torch.amp import autocast, GradScaler
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm.auto import tqdm



DATA_DIR = Path("data/processed")
TRAIN_DIR = DATA_DIR / "train"
VAL_DIR = DATA_DIR / "val"
MODELS_DIR = Path("models")
MODELS_DIR.mkdir(parents=True, exist_ok=True)

device = torch.device("mps" if torch.backends.mps.is_available() else ("cuda" if torch.cuda.is_available() else "cpu"))

is_cuda = torch.cuda.is_available()
is_mps  = torch.backends.mps.is_available()
device  = torch.device("mps" if is_mps else ("cuda" if is_cuda else "cpu"))
scaler = GradScaler(enabled=is_cuda)  # no-op on MPS/CPU

amp_device = "cuda" if is_cuda else ("mps" if is_mps else "cpu")
use_amp = is_cuda or is_mps
amp_dtype = torch.float16  # good default for MPS & CUDA

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

train_tfms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

val_tfms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tfms)
val_ds   = datasets.ImageFolder(VAL_DIR, transform=val_tfms)
num_classes = len(train_ds.classes)

with open(MODELS_DIR / "class_to_idx.json", "w") as f:
    json.dump(train_ds.class_to_idx, f, indent=2)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

weights = models.EfficientNet_B0_Weights.IMAGENET1K_V1
backbone = models.efficientnet_b0(weights=weights)
in_feats = backbone.classifier[1].in_features
backbone.classifier[1] = nn.Linear(in_feats, num_classes)
model = backbone.to(device)
if is_cuda or is_mps:
    model = model.to(memory_format=torch.channels_last)

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=2)


# Rule of thumb on M1/M2: 2–4 workers; tune this.
num_workers = 4
pin = False  # pin_memory is a CUDA thing; leave False on MPS

train_loader = DataLoader(
    train_ds, batch_size=32, shuffle=True,
    num_workers=num_workers, persistent_workers=(num_workers > 0),
    prefetch_factor=2 if num_workers > 0 else None,
    pin_memory=pin
)
val_loader = DataLoader(
    val_ds, batch_size=32, shuffle=False,
    num_workers=num_workers, persistent_workers=(num_workers > 0),
    prefetch_factor=2 if num_workers > 0 else None,
    pin_memory=pin
)

def run_epoch(loader, train: bool):
    model.train() if train else model.eval()
    total_loss, total_correct, total = 0.0, 0, 0
    all_preds, all_targets = [], []

    iterator = tqdm(loader, total=len(loader), desc="train" if train else "val", leave=False)
    # validation: inference_mode() is a tiny speedup vs no_grad()
    ctx = torch.enable_grad() if train else torch.inference_mode()
    
    with ctx:
        for images, targets in iterator:
            images, targets = images.to(device, non_blocking=True), targets.to(device, non_blocking=True)
            # channels_last can help on MPS/CUDA
            if is_cuda or is_mps:
                images = images.contiguous(memory_format=torch.channels_last)

            optimizer.zero_grad(set_to_none=True)
            with autocast(device_type=amp_device, dtype=amp_dtype, enabled=use_amp):
                logits = model(images)
                loss = criterion(logits, targets)

            if train:
                if is_cuda:
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()
                else:
                    loss.backward()
                    optimizer.step()

            total_loss += loss.item() * images.size(0)
            preds = logits.argmax(dim=1)
            total_correct += (preds == targets).sum().item()
            total += images.size(0)
            all_preds.append(preds.detach().cpu().numpy())
            all_targets.append(targets.detach().cpu().numpy())

            if total > 0:
                iterator.set_postfix(loss=total_loss / total, acc=total_correct / total)

    avg_loss = total_loss / max(total, 1)
    accuracy = total_correct / max(total, 1)
    return avg_loss, accuracy, np.concatenate(all_targets), np.concatenate(all_preds)


best_val_acc, patience, max_patience = 0.0, 0, 5
EPOCHS = 4
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc, _, _ = run_epoch(train_loader, train=True)
    val_loss, val_acc, y_true, y_pred = run_epoch(val_loader, train=False)
    scheduler.step(val_loss)
    print(f"Epoch {epoch:02d} | train: {train_loss:.4f}/{train_acc:.3f} | val: {val_loss:.4f}/{val_acc:.3f}")

    if val_acc > best_val_acc:
        best_val_acc, patience = val_acc, 0
        torch.save({"model": model.state_dict(), "classes": train_ds.classes}, MODELS_DIR / "simpsons_effb0_best_v1.pt")
    else:
        patience += 1
        if patience >= max_patience:
            print("Early stopping.")
            break

report = classification_report(y_true, y_pred, target_names=train_ds.classes, digits=3)
print(report)
np.save(MODELS_DIR / "confusion_matrix.npy", confusion_matrix(y_true, y_pred))

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /Users/daniel/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████| 20.5M/20.5M [00:01<00:00, 14.3MB/s]


train:   0%|          | 0/589 [00:04<?, ?it/s]

RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [None]:
# train_simpsons_optimized_m1.py
import json
import os
import random
from pathlib import Path
from typing import Tuple

import numpy as np
import torch
from torch import nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.amp import autocast, GradScaler
from sklearn.metrics import classification_report, confusion_matrix
from tqdm.auto import tqdm


# -----------------------------
# Repro / Basic Config
# -----------------------------
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

DATA_DIR   = Path("data/processed")
TRAIN_DIR  = DATA_DIR / "train"
VAL_DIR    = DATA_DIR / "val"
MODELS_DIR = Path("models"); MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Training config
EPOCHS               = 10
BATCH_SIZE           = 32            # increase/decrease to fit memory
NUM_WORKERS          = 4             # 2–4 is usually good on M1
EARLY_STOP_PATIENCE  = 5

# Finetuning mode:
#   "full"          = update all layers (best accuracy, slower)
#   "linear_probe"  = train only the final linear layer (fastest)
#   "last2_blocks"  = unfreeze last two feature blocks + head (good middle ground)
FINETUNE_MODE        = "linear_probe"

# Optimizer & schedule
LR_BACKBONE          = 3e-4          # used if backbone has trainable params
LR_HEAD              = 1e-3          # usually a bit higher for the new head
WEIGHT_DECAY         = 1e-4
REDUCE_ON_PLATEAU    = dict(factor=0.5, patience=2)


# -----------------------------
# Device & AMP (MPS-friendly)
# -----------------------------
is_cuda = torch.cuda.is_available()
is_mps  = torch.backends.mps.is_available()
device  = torch.device("mps" if is_mps else ("cuda" if is_cuda else "cpu"))

# AMP: enable on CUDA or MPS; scaler is only meaningful on CUDA
use_amp   = is_cuda or is_mps
amp_dev   = "cuda" if is_cuda else ("mps" if is_mps else "cpu")
amp_dtype = torch.float16
scaler    = GradScaler(enabled=is_cuda)  # no-op on MPS/CPU

# Channels-last is beneficial on modern accelerators
USE_CHANNELS_LAST = is_cuda or is_mps

# Pin memory helps only on CUDA
PIN_MEMORY = is_cuda


# -----------------------------
# Data
# -----------------------------
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

train_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),  # remove or shrink for more speed
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

val_tfms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

train_ds = datasets.ImageFolder(TRAIN_DIR, transform=train_tfms)
val_ds   = datasets.ImageFolder(VAL_DIR,   transform=val_tfms)
num_classes = len(train_ds.classes)

with open(MODELS_DIR / "class_to_idx.json", "w") as f:
    json.dump(train_ds.class_to_idx, f, indent=2)

# DataLoaders: persistent workers + prefetch for CPU-side augment speed
prefetch = 2 if NUM_WORKERS > 0 else None
train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=NUM_WORKERS,
    persistent_workers=(NUM_WORKERS > 0),
    prefetch_factor=prefetch,
    pin_memory=PIN_MEMORY,
)
val_loader = DataLoader(
    val_ds, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS,
    persistent_workers=(NUM_WORKERS > 0),
    prefetch_factor=prefetch,
    pin_memory=PIN_MEMORY,
)


# -----------------------------
# Model
# -----------------------------
weights  = models.EfficientNet_B0_Weights.IMAGENET1K_V1
backbone = models.efficientnet_b0(weights=weights)

# Replace final classifier (Dropout, Linear)
in_feats = backbone.classifier[1].in_features
backbone.classifier[1] = nn.Linear(in_feats, num_classes)

model = backbone.to(device)
if USE_CHANNELS_LAST:
    model = model.to(memory_format=torch.channels_last)

criterion = nn.CrossEntropyLoss()


def set_trainable_layers(model: nn.Module, mode: str = "full") -> None:
    """
    Freeze/unfreeze parameters based on finetuning mode.
    """
    # First freeze everything
    for p in model.parameters():
        p.requires_grad = False

    if mode == "linear_probe":
        # Only train the new classifier layer
        for p in model.classifier[1].parameters():
            p.requires_grad = True

    elif mode == "last2_blocks":
        # Unfreeze last two feature blocks + classifier
        for p in model.features[-2:].parameters():
            p.requires_grad = True
        for p in model.classifier.parameters():
            p.requires_grad = True

    elif mode == "full":
        # Train everything
        for p in model.parameters():
            p.requires_grad = True

    else:
        raise ValueError(f"Unknown FINETUNE_MODE: {mode}")


set_trainable_layers(model, FINETUNE_MODE)

# Build optimizer with param groups (larger LR for the head)
head_params      = list(model.classifier[1].parameters())
backbone_params  = [p for n, p in model.named_parameters()
                    if p.requires_grad and not n.startswith("classifier.1")]

opt_groups = []
if backbone_params:
    opt_groups.append({"params": backbone_params, "lr": LR_BACKBONE, "weight_decay": WEIGHT_DECAY})
if head_params:
    opt_groups.append({"params": head_params, "lr": LR_HEAD, "weight_decay": WEIGHT_DECAY})

optimizer = AdamW(opt_groups)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, **REDUCE_ON_PLATEAU)


# -----------------------------
# Train / Eval
# -----------------------------
def run_epoch(loader: DataLoader, train: bool) -> Tuple[float, float, np.ndarray, np.ndarray]:
    model.train() if train else model.eval()
    total_loss, total_correct, total = 0.0, 0, 0
    all_preds, all_targets = [], []

    iterator = tqdm(loader, total=len(loader), desc="train" if train else "val", leave=False)
    # inference_mode is a tad faster than no_grad
    outer_ctx = torch.enable_grad() if train else torch.inference_mode()

    with outer_ctx:
        for images, targets in iterator:
            # Transfers
            images  = images.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            if USE_CHANNELS_LAST:
                images = images.contiguous(memory_format=torch.channels_last)

            # Step
            optimizer.zero_grad(set_to_none=True)
            with autocast(device_type=amp_dev, dtype=amp_dtype, enabled=use_amp):
                logits = model(images)
                loss   = criterion(logits, targets)

            if train:
                if is_cuda:
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()
                else:
                    loss.backward()
                    optimizer.step()

            # Metrics
            total_loss += loss.item() * images.size(0)
            preds = logits.argmax(dim=1)
            total_correct += (preds == targets).sum().item()
            total += images.size(0)
            all_preds.append(preds.detach().cpu().numpy())
            all_targets.append(targets.detach().cpu().numpy())

            iterator.set_postfix(loss=total_loss / max(total, 1), acc=total_correct / max(total, 1))

    avg_loss = total_loss / max(total, 1)
    accuracy = total_correct / max(total, 1)
    return avg_loss, accuracy, np.concatenate(all_targets), np.concatenate(all_preds)


def train_model():
    best_val_acc = 0.0
    patience = 0

    for epoch in range(1, EPOCHS + 1):
        train_loss, train_acc, _, _      = run_epoch(train_loader, train=True)
        val_loss, val_acc, y_true, y_pred = run_epoch(val_loader,   train=False)
        scheduler.step(val_loss)

        print(f"Epoch {epoch:02d} | "
              f"train: {train_loss:.4f}/{train_acc:.3f} | "
              f"val: {val_loss:.4f}/{val_acc:.3f}")

        improved = val_acc > best_val_acc
        if improved:
            best_val_acc = val_acc
            patience = 0
            torch.save(
                {"model": model.state_dict(), "classes": train_ds.classes},
                MODELS_DIR / "simpsons_effb0_best_v2.pt"
            )
        else:
            patience += 1
            if patience >= EARLY_STOP_PATIENCE:
                print("Early stopping.")
                break

    # Final report on the best last eval set (y_true / y_pred from last loop)
    report = classification_report(y_true, y_pred, target_names=train_ds.classes, digits=3)
    print(report)
    np.save(MODELS_DIR / "confusion_matrix.npy", confusion_matrix(y_true, y_pred))

train_model()

train:   0%|          | 0/589 [00:18<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 01 | train: 2.0859/0.488 | val: 1.4436/0.650


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 02 | train: 1.4998/0.605 | val: 1.1846/0.696


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 03 | train: 1.3450/0.639 | val: 1.0605/0.725


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 04 | train: 1.2568/0.658 | val: 1.0167/0.727


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 05 | train: 1.2217/0.662 | val: 0.9656/0.743


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 06 | train: 1.1768/0.674 | val: 0.9417/0.747


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 07 | train: 1.1463/0.682 | val: 0.9473/0.744


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 08 | train: 1.1404/0.684 | val: 0.9325/0.750


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 09 | train: 1.1168/0.688 | val: 0.8771/0.758


train:   0%|          | 0/589 [00:00<?, ?it/s]

val:   0%|          | 0/67 [00:00<?, ?it/s]

Epoch 10 | train: 1.1132/0.687 | val: 0.8682/0.758
                          precision    recall  f1-score   support

  abraham_grampa_simpson      0.835     0.772     0.802        92
           agnes_skinner      1.000     0.600     0.750         5
  apu_nahasapeemapetilon      0.770     0.746     0.758        63
           barney_gumble      0.500     0.273     0.353        11
            bart_simpson      0.730     0.800     0.763       135
            carl_carlson      0.500     0.600     0.545        10
charles_montgomery_burns      0.781     0.833     0.806       120
            chief_wiggum      0.823     0.798     0.810        99
         cletus_spuckler      0.800     0.800     0.800         5
          comic_book_guy      0.875     0.745     0.805        47
               disco_stu      0.000     0.000     0.000         1
          edna_krabappel      0.857     0.783     0.818        46
                fat_tony      1.000     0.333     0.500         3
                     gil

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
# infer_simpsons.py
import json
from pathlib import Path
import torch
from torchvision import transforms, models
from PIL import Image

MODELS_DIR = Path("models")
weights_path = MODELS_DIR / "simpsons_effb0_best.pt"
with open(MODELS_DIR / "class_to_idx.json") as f:
    class_to_idx = json.load(f)
idx_to_class = {v: k for k, v in class_to_idx.items()}

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]
tfms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

device = torch.device("mps" if torch.backends.mps.is_available() else ("cuda" if torch.cuda.is_available() else "cpu"))
weights = models.EfficientNet_B0_Weights.IMAGENET1K_V1
model = models.efficientnet_b0(weights=weights)
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, len(idx_to_class))
ckpt = torch.load(weights_path, map_location="cpu")
model.load_state_dict(ckpt["model"])
model.eval().to(device)

def predict(image_path: str, topk: int = 5):
    img = Image.open(image_path).convert("RGB")
    tensor = tfms(img).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(tensor)
        probs = torch.softmax(logits, dim=1).squeeze(0)
    topk_probs, topk_idxs = probs.topk(topk)
    return [(idx_to_class[i.item()], float(topk_probs[j])) for j, i in enumerate(topk_idxs)]

if __name__ == "__main__":
    import sys
    img_path = sys.argv[1]
    for cls, p in predict(img_path):
        print(f"{cls}: {p:.3f}")

FileNotFoundError: [Errno 2] No such file or directory: '--f=/Users/dmu06/Library/Jupyter/runtime/kernel-v341f456870aa3191b8fba386f7fc2e06ad7ae9527.json'