In [2]:
# traffic_classifier_resnet.py
import os
import json
import random
import numpy as np
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# --------------------
# Reproducibility
# --------------------
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# --------------------
# Paths (as requested)
# --------------------
IMAGES_PATH = "../output/traffic_lights_images.npy"
LABELS_PATH = "../output/traffic_lights_label.npy"
SAVE_DIR = "../Models"
os.makedirs(SAVE_DIR, exist_ok=True)

# --------------------
# cuDNN speedups (good for fixed input size)
# --------------------
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

# --------------------
# Load data (NumPy)
# --------------------
X = np.load(IMAGES_PATH)  # (N,H,W) or (N,H,W,C)
y_raw = np.load(LABELS_PATH)  # (N,) with values in {-1,0,1}

if X.ndim not in (3, 4):
    raise ValueError(f"Expected images with 3 or 4 dims (N,H,W[,C]); got {X.shape}")

# Ensure channel dimension and channel-first format
if X.ndim == 3:
    # (N,H,W) -> (N,1,H,W)
    X = np.expand_dims(X, axis=1)
else:
    # (N,H,W,C) -> (N,C,H,W)
    X = np.transpose(X, (0, 3, 1, 2))

# Normalize to [0,1]
X = X.astype("float32")
if X.max() > 1.5:
    X /= 255.0

# Remap labels {-1,0,1} -> {0,1,2}
label_to_index = {-1: 0, 0: 1, 1: 2}
index_to_label = {v: k for k, v in label_to_index.items()}
try:
    y = np.vectorize(label_to_index.__getitem__)(y_raw)
except KeyError as e:
    raise ValueError(f"Unknown label {e}; expected only -1, 0, 1")

num_classes = 3
in_channels = X.shape[1]

# --------------------
# Convert to tensors once (fast path)
# --------------------
X_torch = torch.from_numpy(X).contiguous()                    # (N,C,H,W) float32 in [0,1]
y_torch = torch.from_numpy(y.astype(np.int64)).contiguous()   # (N,)

# --------------------
# Split 80/20 (stratified)
# --------------------
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(
    X_torch.numpy(), y_torch.numpy(),
    test_size=0.2, random_state=SEED, stratify=y_torch.numpy()
)
X_train = torch.from_numpy(X_train_np).contiguous()
X_val   = torch.from_numpy(X_val_np).contiguous()
y_train = torch.from_numpy(y_train_np).contiguous()
y_val   = torch.from_numpy(y_val_np).contiguous()

# --------------------
# DataLoaders
# NOTE: ResNet uses more memory than the small CNN. Start with smaller batches.
# --------------------
def _workers():
    c = os.cpu_count() or 2
    return max(min(4, c - 1), 0)

num_workers = _workers()
persistent = True if num_workers > 0 else False

train_ds = TensorDataset(X_train, y_train)
val_ds   = TensorDataset(X_val, y_val)

train_loader = DataLoader(
    train_ds, batch_size=64, shuffle=True,
    num_workers=num_workers, pin_memory=True, persistent_workers=persistent
)
val_loader = DataLoader(
    val_ds, batch_size=128, shuffle=False,
    num_workers=num_workers, pin_memory=True, persistent_workers=persistent
)

# --------------------
# Model: ResNet-18 backbone adapted to 1 or 3 channels
# --------------------
def build_resnet_classifier(in_channels: int, num_classes: int):
    # Handle torchvision version differences for weights/pretrained arg
    try:
        from torchvision.models import resnet18, ResNet18_Weights  # newer API
        backbone = resnet18(weights=None)
    except Exception:
        from torchvision.models import resnet18  # fallback
        backbone = resnet18(pretrained=False)

    # Adapt first conv to input channels if needed
    if in_channels != 3:
        backbone.conv1 = nn.Conv2d(
            in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
        )

    # Replace the classification head
    in_feat = backbone.fc.in_features
    backbone.fc = nn.Sequential(
        nn.Dropout(0.2),
        nn.Linear(in_feat, num_classes)
    )

    # Wrap in a module that ensures input is big enough (ResNet expects ~224x224)
    class ResNetClassifier(nn.Module):
        def __init__(self, net):
            super().__init__()
            self.net = net

        def forward(self, x):
            # If H or W is small, upscale to 224 (no-op if already >=224)
            if x.dim() == 4:
                _, _, H, W = x.shape
                if H < 224 or W < 224:
                    x = F.interpolate(x, size=(224, 224), mode="bilinear", align_corners=False)
            return self.net(x)

    return ResNetClassifier(backbone)

# --------------------
# Device & AMP
# --------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
print("AMP enabled:", torch.cuda.is_available())

model = build_resnet_classifier(in_channels=in_channels, num_classes=num_classes).to(device)

# --------------------
# Loss / Optim / Scheduler
# --------------------
# Optional: class weights if imbalanced
ctr = Counter(y_train.tolist())
counts = np.array([ctr.get(i, 0) for i in range(num_classes)], dtype=np.float32)
if np.any(counts == 0):
    class_weights = None
else:
    inv = 1.0 / counts
    class_weights = torch.tensor(inv / inv.sum() * num_classes, dtype=torch.float32, device=device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=3, min_lr=1e-6
)
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

# --------------------
# Training / Eval loops
# --------------------
def evaluate(loader):
    model.eval()
    total, correct, run_loss = 0, 0, 0.0
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                logits = model(xb)
                loss = criterion(logits, yb)
            run_loss += loss.item() * yb.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == yb).sum().item()
            total += yb.size(0)
    return run_loss / max(total, 1), correct / max(total, 1)

EPOCHS = 40
best_val_acc = 0.0
best_state = None
patience = 7
epochs_no_improve = 0

for epoch in range(1, EPOCHS + 1):
    model.train()
    run_loss = 0.0
    seen = 0

    for xb, yb in train_loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
            logits = model(xb)
            loss = criterion(logits, yb)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        run_loss += loss.item() * yb.size(0)
        seen += yb.size(0)

    train_loss = run_loss / max(seen, 1)
    val_loss, val_acc = evaluate(val_loader)

    old_lr = optimizer.param_groups[0]['lr']
    scheduler.step(val_loss)
    new_lr = optimizer.param_groups[0]['lr']
    if new_lr != old_lr:
        print(f"[LR] reduced: {old_lr:.6g} -> {new_lr:.6g}")

    print(f"Epoch {epoch:02d}/{EPOCHS} | train_loss: {train_loss:.4f} | val_loss: {val_loss:.4f} | val_acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "val_acc": float(val_acc),
            "val_loss": float(val_loss),
            "in_channels": int(in_channels),
            "num_classes": int(num_classes),
        }
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch} (no val_acc improvement for {patience} epochs).")
            break

# Load best and evaluate once more
if best_state is not None:
    model.load_state_dict(best_state["model_state_dict"])
final_val_loss, final_val_acc = evaluate(val_loader)
print(f"Best validation accuracy:  {best_val_acc:.4f}")
print(f"Final validation accuracy: {final_val_acc:.4f}")

# --------------------
# Save artifacts
# --------------------
# 1) state_dict bundle (for finetuning/resume)
pth_path = os.path.join(SAVE_DIR, "traffic_classifier_state.pth")
torch.save(best_state if best_state is not None else model.state_dict(), pth_path)
print(f"Saved state to: {pth_path}")

# 2) TorchScript (for deployment/inference)
example = X_val[:1].to(device)
model.eval()
with torch.no_grad():
    scripted = torch.jit.trace(model, example)
ts_path = os.path.join(SAVE_DIR, "traffic_classifier_scripted.pt")
scripted.save(ts_path)
print(f"Saved TorchScript model to: {ts_path}")

# 3) class mapping & metrics
with open(os.path.join(SAVE_DIR, "class_mapping.json"), "w") as f:
    json.dump(
        {
            "index_to_label": {str(k): int(v) for k, v in index_to_label.items()},
            "label_to_index": {str(k): int(v) for k, v in label_to_index.items()},
            "semantics": {"-1": "no light", "0": "red", "1": "green"}
        },
        f,
        indent=2
    )

with open(os.path.join(SAVE_DIR, "metrics.txt"), "w") as f:
    f.write(f"Final validation accuracy: {final_val_acc:.4f}\n")
    f.write(f"Final validation loss: {final_val_loss:.4f}\n")
    f.write(f"Best validation accuracy: {best_val_acc:.4f}\n")

# --------------------
# Quick prediction helper (numpy in, original labels out)
# --------------------
def predict_labels_numpy(np_batch: np.ndarray, batch_size: int = 128) -> np.ndarray:
    """
    np_batch: (N,H,W[,C]) float32 in [0,1] or uint8 0..255
    Returns: labels in original space {-1,0,1}
    """
    model.eval()
    if np_batch.ndim == 3:
        np_batch = np.expand_dims(np_batch, axis=0)

    # (N,H,W,C) -> (N,C,H,W) if needed
    if np_batch.shape[-1] in (1, 3) and np_batch.shape[1] not in (1, 3):
        np_batch = np.transpose(np_batch, (0, 3, 1, 2))
    if np_batch.dtype != np.float32:
        np_batch = np_batch.astype("float32")
    if np_batch.max() > 1.5:
        np_batch /= 255.0

    # Channel alignment
    if np_batch.shape[1] != in_channels:
        if in_channels == 1 and np_batch.shape[1] == 3:
            np_batch = np_batch.mean(axis=1, keepdims=True)  # RGB->gray
        elif in_channels == 3 and np_batch.shape[1] == 1:
            np_batch = np.repeat(np_batch, 3, axis=1)        # gray->RGB
        else:
            raise ValueError(f"Channel mismatch: expected {in_channels}, got {np_batch.shape[1]}")

    preds = []
    with torch.no_grad():
        for i in range(0, np_batch.shape[0], batch_size):
            chunk = torch.from_numpy(np_batch[i:i+batch_size]).to(device, non_blocking=True)
            # Resize to 224 to match training behavior
            _, C, H, W = chunk.shape
            if H < 224 or W < 224:
                chunk = F.interpolate(chunk, size=(224, 224), mode="bilinear", align_corners=False)
            logits = model(chunk)
            preds.append(logits.argmax(dim=1).cpu().numpy())
    preds = np.concatenate(preds, axis=0)
    return np.vectorize(index_to_label.__getitem__)(preds)

# Example:
# sample_preds = predict_labels_numpy(X_val[:8].cpu().numpy())
# print("Sample preds:", sample_preds)


CUDA available: True
GPU: NVIDIA GeForce GTX 1060 6GB
AMP enabled: True


  scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Epoch 01/40 | train_loss: 1.2787 | val_loss: 1.1996 | val_acc: 0.3294
Epoch 02/40 | train_loss: 1.1168 | val_loss: 1.2489 | val_acc: 0.4167
Epoch 03/40 | train_loss: 0.9822 | val_loss: 1.1432 | val_acc: 0.4405
Epoch 04/40 | train_loss: 0.8496 | val_loss: 0.9183 | val_acc: 0.5833
Epoch 05/40 | train_loss: 0.7211 | val_loss: 1.0120 | val_acc: 0.5556
Epoch 06/40 | train_loss: 0.5900 | val_loss: 1.6368 | val_acc: 0.4960
Epoch 07/40 | train_loss: 0.4890 | val_loss: 1.8736 | val_acc: 0.4167
[LR] reduced: 0.001 -> 0.0005
Epoch 08/40 | train_loss: 0.3955 | val_loss: 1.8268 | val_acc: 0.4921
Epoch 09/40 | train_loss: 0.2229 | val_loss: 0.6027 | val_acc: 0.7897
Epoch 10/40 | train_loss: 0.0960 | val_loss: 0.2646 | val_acc: 0.9206
Epoch 11/40 | train_loss: 0.0735 | val_loss: 0.5133 | val_acc: 0.8056
Epoch 12/40 | train_loss: 0.0700 | val_loss: 0.4608 | val_acc: 0.8413
Epoch 13/40 | train_loss: 0.0559 | val_loss: 0.2718 | val_acc: 0.9127
[LR] reduced: 0.0005 -> 0.00025
Epoch 14/40 | train_loss: 0.

  if H < 224 or W < 224:


Saved TorchScript model to: ../Models\traffic_classifier_scripted.pt
