# DASC 41103 – Project 3: CNN Razorback Logo Classifier (Colab Version)

This notebook is modeled after the class CNN examples...

In [None]:
# box 1: Mount Google Drive (Colab)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Drive mounted successfully.")
except ImportError:
    print("Not running in Google Colab - skipping drive.mount().")

In [None]:
# box 2: Imports and basic configuration
import os
from pathlib import Path
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
SEED=42
random.seed(SEED)
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATASET_ROOT = Path("/content/drive/MyDrive/DEIN_ORDNER/dataset")
TRAIN_RATIO=0.8
BATCH_SIZE=8
NUM_EPOCHS=15
LEARNING_RATE=1e-3
GROUP_NUMBER=3

In [None]:
# box 3: Check folders
hogs_dir=DATASET_ROOT/'hogs'
no_hogs_dir=DATASET_ROOT/'no_hogs'
print(hogs_dir, no_hogs_dir)

In [None]:
# box 4: Transforms and dataset
transform = transforms.Compose([
    transforms.Resize((500,500)),
    transforms.ToTensor(),
])
base_dataset = datasets.ImageFolder(root=DATASET_ROOT, transform=transform)
print(base_dataset.classes, base_dataset.class_to_idx, len(base_dataset))

In [None]:
# box 5: Wrapper dataset
class HogLogoDataset(Dataset):
    def __init__(self, ds):
        self.ds=ds
        m=ds.class_to_idx
        self.map={m['hogs']:1, m['no_hogs']:0}
    def __len__(self): return len(self.ds)
    def __getitem__(self, i):
        x,y=self.ds[i]
        return x, self.map[y]

full_dataset=HogLogoDataset(base_dataset)


In [None]:
# box 6: Split
total=len(full_dataset)
train_size=int(TRAIN_RATIO*total)
valid_size=total-train_size
train_dataset, valid_dataset = random_split(full_dataset,[train_size,valid_size])
train_dl=DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
valid_dl=DataLoader(valid_dataset,batch_size=BATCH_SIZE)

In [None]:
# box 7: Visualize a few sample images

import matplotlib.pyplot as plt

# show a small grid of images from the base_dataset to verify labels & transforms
n_rows, n_cols = 2, 4
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 6))

for ax, (img, label) in zip(axes.flatten(), base_dataset):
    # img is a tensor in [0,1] with shape [C,H,W]
    img_np = img.permute(1, 2, 0).numpy()
    ax.imshow(img_np)
    ax.set_title(f"label: {base_dataset.classes[label]}")
    ax.axis("off")

plt.tight_layout()
plt.show()
print("Shown a sample of images from the dataset.")

In [None]:
# box 8: Model
model=nn.Sequential(
    nn.Conv2d(3,32,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
    nn.Conv2d(32,64,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
    nn.Conv2d(64,128,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
    nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(),
    nn.Linear(128,64), nn.ReLU(), nn.Dropout(0.5),
    nn.Linear(64,2)
).to(device)


In [None]:
# box 9: Loss & optim
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# box 10: Train & evaluation utilities

def accuracy_from_logits(logits, y):
    """Compute accuracy given model logits and true labels."""
    preds = logits.argmax(dim=1)
    return (preds == y).float().mean().item()

def run_epoch(model, dataloader, optimizer=None, loss_fn=loss_fn, device=device):
    """Run one epoch over a dataloader.

    If optimizer is provided, the model is set to train mode and parameters updated.
    If optimizer is None, the model is set to eval mode and no gradients are computed.
    Returns average loss and accuracy for the epoch.
    """
    if optimizer is None:
        model.eval()
        torch.set_grad_enabled(False)
    else:
        model.train()
        torch.set_grad_enabled(True)

    total_loss = 0.0
    total_correct = 0
    total_examples = 0

    for xb, yb in dataloader:
        xb = xb.to(device)
        yb = yb.to(device)

        if optimizer is not None:
            optimizer.zero_grad()

        logits = model(xb)
        loss = loss_fn(logits, yb)

        if optimizer is not None:
            loss.backward()
            optimizer.step()

        batch_size = xb.size(0)
        total_loss += loss.item() * batch_size

        preds = logits.argmax(dim=1)
        total_correct += (preds == yb).sum().item()
        total_examples += batch_size

    avg_loss = total_loss / max(total_examples, 1)
    avg_acc = total_correct / max(total_examples, 1)

    # always restore gradient setting to default (on)
    torch.set_grad_enabled(True)
    return avg_loss, avg_acc

def train(model, epochs, train_dl, valid_dl, optimizer=optimizer, loss_fn=loss_fn, device=device):
    """Train the CNN model and track metrics across epochs.

    This closely follows the structure of the in-class MNIST CNN example:
    * for each epoch, run a training pass and a validation pass
    * record loss and accuracy for both splits
    * print a short training log
    """
    history = {
        "train_loss": [],
        "train_acc": [],
        "valid_loss": [],
        "valid_acc": [],
    }

    for epoch in range(1, epochs + 1):
        train_loss, train_acc = run_epoch(model, train_dl, optimizer=optimizer, loss_fn=loss_fn, device=device)
        valid_loss, valid_acc = run_epoch(model, valid_dl, optimizer=None, loss_fn=loss_fn, device=device)

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["valid_loss"].append(valid_loss)
        history["valid_acc"].append(valid_acc)

        print(f"Epoch {epoch:02d}/{epochs:02d} | "
              f"train loss: {train_loss:.4f} acc: {train_acc:.4f} | "
              f"valid loss: {valid_loss:.4f} acc: {valid_acc:.4f}")

    return history

In [None]:
# box 11–14: Train model, plot metrics, and save full model

# 1) Train the model
history = train(model, NUM_EPOCHS, train_dl, valid_dl)

# 2) Plot training & validation loss / accuracy
epochs_range = range(1, NUM_EPOCHS + 1)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, history["train_loss"], label="train loss")
plt.plot(epochs_range, history["valid_loss"], label="valid loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.title("Loss over epochs")

plt.subplot(1, 2, 2)
plt.plot(epochs_range, history["train_acc"], label="train acc")
plt.plot(epochs_range, history["valid_acc"], label="valid acc")
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.legend()
plt.ylim(0.0, 1.0)
plt.title("Accuracy over epochs")

plt.tight_layout()
plt.show()

# 3) Simple evaluation on validation set (final metrics)
final_valid_loss, final_valid_acc = run_epoch(model, valid_dl, optimizer=None, loss_fn=loss_fn, device=device)
print(f"Final validation loss: {final_valid_loss:.4f}, accuracy: {final_valid_acc:.4f}")

# 4) Save full model for later use (meets project deliverable)
save_dir = Path("/content/drive/MyDrive")
save_dir.mkdir(parents=True, exist_ok=True)
model_filename = f"Group_{GROUP_NUMBER}_CNN_FullModel.ph"
save_path = save_dir / model_filename

torch.save(model, save_path)
print(f"Saved full model to: {save_path}")