In [1]:
import torch
torch.backends.cudnn.benchmark = True


In [None]:
# Cell 1: imports & paths

import os
from pathlib import Path
import time
from tqdm.auto import tqdm
import multiprocessing

import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets

# Try to use transforms.v2 (faster C++ backend); fallback to classic transforms
try:
    from torchvision.transforms import v2 as T
    HAVE_V2 = True
    print("Using torchvision.transforms.v2")
except ImportError:
    from torchvision import transforms as T
    HAVE_V2 = False
    print("Using torchvision.transforms (legacy)")

# ---- Paths ----
REPO_ROOT = Path.cwd().parents[0] if Path.cwd().name == "notebooks" else Path.cwd()
RAW_ROOT = REPO_ROOT / "data" / "raw"      # original yolo-style
SW_PATCH_ROOT = REPO_ROOT / "data" / "patches_sw"  # sliding-window patches

print("Repo root:", REPO_ROOT)
print("Sliding-window patch root:", SW_PATCH_ROOT)

# ---- GPU & multiprocessing setup ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

if device.type == "cuda":
    print("CUDA:", torch.cuda.is_available())
    print("GPU name:", torch.cuda.get_device_name(0))

# On Windows + Jupyter: needed for num_workers > 0
multiprocessing.set_start_method("spawn", force=True)
print("Multiprocessing start method set to 'spawn'")


Repo root:     C:\Users\amrik\Data Science\Personal-PCB-Project
SW_PATCH_ROOT: C:\Users\amrik\Data Science\Personal-PCB-Project\data\patches_sw
Using device: cuda
GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [3]:
def read_yolo_labels(label_path, img_w, img_h):
    """
    Return a list of (cls_idx, x1, y1, x2, y2) in PIXELS from a YOLO txt file.
    YOLO format per line: cls cx cy w h (all normalized 0â€“1).
    """
    boxes = []
    if not label_path.exists():
        return boxes

    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            cls = int(parts[0])
            cx, cy, bw, bh = map(float, parts[1:])

            # convert to pixel coords
            cx *= img_w
            cy *= img_h
            bw *= img_w
            bh *= img_h

            x1 = cx - bw / 2
            y1 = cy - bh / 2
            x2 = cx + bw / 2
            y2 = cy + bh / 2

            # clip
            x1 = max(0, min(img_w - 1, x1))
            y1 = max(0, min(img_h - 1, y1))
            x2 = max(0, min(img_w - 1, x2))
            y2 = max(0, min(img_h - 1, y2))

            boxes.append((cls, x1, y1, x2, y2))
    return boxes


In [4]:
def box_iou_xyxy(box_a, box_b):
    """IoU between box_a and box_b, each = (x1,y1,x2,y2)."""
    xa1, ya1, xa2, ya2 = box_a
    xb1, yb1, xb2, yb2 = box_b

    inter_x1 = max(xa1, xb1)
    inter_y1 = max(ya1, yb1)
    inter_x2 = min(xa2, xb2)
    inter_y2 = min(ya2, yb2)

    if inter_x2 <= inter_x1 or inter_y2 <= inter_y1:
        return 0.0

    inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
    area_a = (xa2 - xa1) * (ya2 - ya1)
    area_b = (xb2 - xb1) * (yb2 - yb1)

    return inter_area / (area_a + area_b - inter_area + 1e-9)


def label_window_overlap(x1, y1, x2, y2, gt_boxes, min_rel_overlap=0.30):
    """
    Label a sliding window based on how much it overlaps any ground-truth box.

    - gt_boxes: list of (cls_idx, gx1, gy1, gx2, gy2) in pixels.
    - min_rel_overlap: minimum (intersection_area / box_area) to mark as positive.

    Returns: class_name ("missing_hole", ..., "background").
    """

    best_cls = None
    best_ratio = 0.0

    for (cls_idx, gx1, gy1, gx2, gy2) in gt_boxes:
        # intersection
        ix1 = max(x1, gx1)
        iy1 = max(y1, gy1)
        ix2 = min(x2, gx2)
        iy2 = min(y2, gy2)

        if ix2 <= ix1 or iy2 <= iy1:
            continue  # no overlap

        inter_area = (ix2 - ix1) * (iy2 - iy1)
        box_area   = (gx2 - gx1) * (gy2 - gy1)
        if box_area <= 0:
            continue

        # how much of the *defect box* is covered by this window?
        rel_overlap = inter_area / box_area

        if rel_overlap > best_ratio:
            best_ratio = rel_overlap
            best_cls = DEFECT_CLASSES[cls_idx]

    if best_cls is not None and best_ratio >= min_rel_overlap:
        return best_cls

    return "background"



def label_window_center(x1, y1, x2, y2, gt_boxes):
    """
    Label a sliding window based on whether its CENTER lies inside a ground-truth box.
    gt_boxes: list of (cls_idx, gx1, gy1, gx2, gy2) in pixels.

    Returns: class_name ("missing_hole", ..., "background") or None (if we ever want to ignore).
    """
    cx = 0.5 * (x1 + x2)
    cy = 0.5 * (y1 + y2)

    if not gt_boxes:
        # no defects in this image at all
        return "background"

    for (cls_idx, gx1, gy1, gx2, gy2) in gt_boxes:
        if (gx1 <= cx <= gx2) and (gy1 <= cy <= gy2):
            # center lies inside this defect bbox
            return DEFECT_CLASSES[cls_idx]   # YOLO cls_idx is 0..5

    # center doesnâ€™t fall inside any bbox
    return "background"



In [13]:
def generate_patches_for_image(img_path, split):
    """
    Slide window over single image, label each patch via CENTER-IN-BOX rule, save cropped patch.
    Returns: (num_pos, num_bg)
    """
    img = Image.open(img_path).convert("RGB")
    W, H = img.size

    # label file path (filenames match exactly in your dataset)
    label_name = img_path.stem + ".txt"
    label_path = img_path.parent.parent / "labels" / label_name

    gt_boxes = read_yolo_labels(label_path, W, H)

    num_pos = 0
    num_bg = 0

    base_name = img_path.stem

    for y in range(0, H - PATCH_SIZE + 1, STRIDE):
        for x in range(0, W - PATCH_SIZE + 1, STRIDE):
            x1, y1 = x, y
            x2, y2 = x + PATCH_SIZE, y + PATCH_SIZE

            cls_name = label_window_center(x1, y1, x2, y2, gt_boxes)

            # optional: subsample background so it doesnâ€™t dominate too hard
            if cls_name == "background" and np.random.rand() > BG_KEEP_PROB:
                continue

            patch = img.crop((x1, y1, x2, y2))

            out_dir = SW_PATCH_ROOT / split / cls_name
            out_name = f"{base_name}_x{x}_y{y}.jpg"
            patch.save(out_dir / out_name, quality=95)

            if cls_name == "background":
                num_bg += 1
            else:
                num_pos += 1

    return num_pos, num_bg


In [9]:
import shutil

if SW_PATCH_ROOT.exists():
    print("Removing old sliding-window patches at:", SW_PATCH_ROOT)
    shutil.rmtree(SW_PATCH_ROOT)

for split in ["train", "valid", "test"]:
    for cls in ALL_CLASSES:
        out_dir = SW_PATCH_ROOT / split / cls
        out_dir.mkdir(parents=True, exist_ok=True)

print("Fresh SW_PATCH_ROOT created:", SW_PATCH_ROOT)


Removing old sliding-window patches at: C:\Users\amrik\Data Science\Personal-PCB-Project\data\patches_sw
Fresh SW_PATCH_ROOT created: C:\Users\amrik\Data Science\Personal-PCB-Project\data\patches_sw


In [5]:
def generate_patches_for_image(img_path, split):
    """
    Slide window over single image, label each patch via overlap-with-box rule, save cropped patch.
    Returns: (num_pos, num_bg)
    """
    img = Image.open(img_path).convert("RGB")
    W, H = img.size

    # label file path
    label_path = img_path.parent.parent / "labels" / (img_path.stem + ".txt")

    if not label_path.exists():
        print(f"[WARN] No label file for: {img_path.name}")
        gt_boxes = []
    else:
        gt_boxes = read_yolo_labels(label_path, W, H)


    num_pos = 0
    num_bg = 0

    base_name = img_path.stem

    for y in range(0, H - PATCH_SIZE + 1, STRIDE):
        for x in range(0, W - PATCH_SIZE + 1, STRIDE):
            x1, y1 = x, y
            x2, y2 = x + PATCH_SIZE, y + PATCH_SIZE

            cls_name = label_window_overlap(x1, y1, x2, y2, gt_boxes,
                                            min_rel_overlap=0.30)

            # background subsampling
            if cls_name == "background" and np.random.rand() > BG_KEEP_PROB:
                continue

            patch = img.crop((x1, y1, x2, y2))

            out_dir = SW_PATCH_ROOT / split / cls_name
            out_name = f"{base_name}_x{x}_y{y}.jpg"
            patch.save(out_dir / out_name, quality=95)

            if cls_name == "background":
                num_bg += 1
            else:
                num_pos += 1

    return num_pos, num_bg



In [6]:
train_img_dir = RAW_ROOT / "train" / "images"
one_img = sorted(train_img_dir.glob("*.jpg"))[0]
print("Test image:", one_img.name)

p, b = generate_patches_for_image(one_img, split="train")
print("From this single image -> positives:", p, "background:", b)


Test image: 01_missing_hole_01_jpg.rf.048b220b403c2678f1a9f40f5bb3a8eb.jpg
From this single image -> positives: 3 background: 7


In [7]:
PRINT_ROOT = SW_PATCH_ROOT

for split in ["train", "valid", "test"]:
    print("\n", split)
    split_dir = PRINT_ROOT / split
    for cls in sorted(os.listdir(split_dir)):
        cls_dir = split_dir / cls
        if cls_dir.is_dir():
            n = len(list(cls_dir.glob("*.jpg")))
            print(f"  {cls:15s} -> {n}")



 train
  background      -> 24354
  missing_hole    -> 3988
  mouse_bite      -> 4120
  open_circuit    -> 3771
  short           -> 4032
  spur            -> 3971
  spurious_copper -> 3382

 valid
  background      -> 11306
  missing_hole    -> 1871
  mouse_bite      -> 2140
  open_circuit    -> 1846
  short           -> 2169
  spur            -> 1863
  spurious_copper -> 1753

 test
  background      -> 3866
  missing_hole    -> 715
  mouse_bite      -> 460
  open_circuit    -> 674
  short           -> 840
  spur            -> 610
  spurious_copper -> 622


In [24]:
from tqdm import tqdm

def generate_all_patches_for_split(split):
    img_dir = RAW_ROOT / split / "images"
    img_paths = sorted(img_dir.glob("*.jpg"))

    total_pos = 0
    total_bg = 0

    print(f"\n=== Generating sliding-window patches for {split} "
          f"({len(img_paths)} images) ===")
    for img_path in tqdm(img_paths):
        p, b = generate_patches_for_image(img_path, split)
        total_pos += p
        total_bg += b

    print(f"Split {split}: positives={total_pos}, background_kept={total_bg}")


for split in ["train", "valid", "test"]:
    generate_all_patches_for_split(split)



=== Generating sliding-window patches for train (3224 images) ===


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3224/3224 [02:41<00:00, 20.00it/s]


Split train: positives=23219, background_kept=23125

=== Generating sliding-window patches for valid (1592 images) ===


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1592/1592 [01:21<00:00, 19.42it/s]


Split valid: positives=11642, background_kept=11306

=== Generating sliding-window patches for test (537 images) ===


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 537/537 [00:19<00:00, 27.24it/s]

Split test: positives=3921, background_kept=3866





In [9]:
print("Train size:", len(train_dataset))
print("Valid size:", len(val_dataset))
print("Test  size:", len(test_dataset))


Train size: 47618
Valid size: 22948
Test  size: 7787


In [22]:
# Cell 2: hyperparameters & transforms

IMG_SIZE = 160
BATCH_SIZE = 128
NUM_WORKERS = 4          # tune if CPU has more/less cores
PREFETCH_FACTOR = 2
PIN_MEMORY = (device.type == "cuda")

EPOCHS = 8               # you can bump later
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-4

# mean/std for PCB-like green boards (rough approximation; not critical)
MEAN = (0.5, 0.5, 0.5)
STD  = (0.25, 0.25, 0.25)

if HAVE_V2:
    train_transform = T.Compose([
        T.ToImage(),                           # faster tensor conversion
        T.RandomHorizontalFlip(p=0.5),
        T.RandomVerticalFlip(p=0.5),
        T.RandomRotation(degrees=5),
        T.Resize((IMG_SIZE, IMG_SIZE)),
        T.ToDtype(torch.float32, scale=True),
        T.Normalize(MEAN, STD),
    ])

    eval_transform = T.Compose([
        T.ToImage(),
        T.Resize((IMG_SIZE, IMG_SIZE)),
        T.ToDtype(torch.float32, scale=True),
        T.Normalize(MEAN, STD),
    ])
else:
    train_transform = T.Compose([
        T.Resize((IMG_SIZE, IMG_SIZE)),
        T.RandomHorizontalFlip(p=0.5),
        T.RandomVerticalFlip(p=0.5),
        T.RandomRotation(degrees=5),
        T.ToTensor(),
        T.Normalize(MEAN, STD),
    ])

    eval_transform = T.Compose([
        T.Resize((IMG_SIZE, IMG_SIZE)),
        T.ToTensor(),
        T.Normalize(MEAN, STD),
    ])


In [23]:
# Cell 3: datasets and dataloaders

train_dir = SW_PATCH_ROOT / "train"
val_dir   = SW_PATCH_ROOT / "valid"
test_dir  = SW_PATCH_ROOT / "test"

train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
val_dataset   = datasets.ImageFolder(val_dir,   transform=eval_transform)
test_dataset  = datasets.ImageFolder(test_dir,  transform=eval_transform)

classes = train_dataset.classes
num_classes = len(classes)

print("Classes:", classes)
print("Train size:", len(train_dataset))
print("Valid size:", len(val_dataset))
print("Test  size:", len(test_dataset))

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    persistent_workers=True,
    prefetch_factor=PREFETCH_FACTOR,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    persistent_workers=True,
    prefetch_factor=PREFETCH_FACTOR,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=PIN_MEMORY,
    persistent_workers=True,
    prefetch_factor=PREFETCH_FACTOR,
)

print("Batches -> train:", len(train_loader), 
      "valid:", len(val_loader), "test:", len(test_loader))


Classes: ['background', 'missing_hole', 'mouse_bite', 'open_circuit', 'short', 'spur', 'spurious_copper']
Train size: 47618
Valid size: 22948
Test  size: 7787
Batches -> train: 373 valid: 180 test: 61


In [24]:
# Cell 4: model, loss, optimizer, scheduler, scaler

from torchvision import models
from torch.cuda.amp import GradScaler

# Pretrained MobileNetV2 backbone
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)

# Replace classifier head
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, num_classes)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="max",
    factor=0.5,
    patience=2,
)

scaler = GradScaler(enabled=(device.type == "cuda"))

print("Model ready on", device)


Model ready on cuda


  scaler = GradScaler(enabled=(device.type == "cuda"))


In [28]:
# Cell 5: train/test epoch functions (mixed precision + timing)

from torch.amp import autocast  # new API (Torch 2.x)

def train_one_epoch(model, dataloader, optimizer, criterion, scaler, device, epoch=None, total_epochs=None):
    model.train()
    running_loss = 0.0
    running_correct = 0
    total = 0

    # ---> Progress bar for batches
    pbar = tqdm(dataloader, desc=f"Epoch {epoch}/{total_epochs} [Train]", leave=False)

    for inputs, targets in pbar:
        inputs = inputs.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.amp.autocast(device_type=device.type, dtype=torch.float16, enabled=(device.type=="cuda")):
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # stats update
        batch_size = inputs.size(0)
        running_loss += loss.item() * batch_size
        _, preds = outputs.max(1)
        running_correct += preds.eq(targets).sum().item()
        total += batch_size

        # update tqdm bar display
        pbar.set_postfix({
            "loss": f"{running_loss/total:.4f}",
            "acc": f"{running_correct/total:.4f}"
        })

    return running_loss / total, running_correct / total


def eval_one_epoch(model, dataloader, criterion, device, epoch=None, total_epochs=None):
    model.eval()
    running_loss = 0.0
    running_correct = 0
    total = 0

    pbar = tqdm(dataloader, desc=f"Epoch {epoch}/{total_epochs} [Eval]", leave=False)

    with torch.no_grad():
        for inputs, targets in pbar:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            with torch.amp.autocast(device_type=device.type, dtype=torch.float16, enabled=(device.type=="cuda")):
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            batch_size = inputs.size(0)
            running_loss += loss.item() * batch_size
            _, preds = outputs.max(1)
            running_correct += preds.eq(targets).sum().item()
            total += batch_size

            pbar.set_postfix({
                "loss": f"{running_loss/total:.4f}",
                "acc": f"{running_correct/total:.4f}"
            })

    return running_loss / total, running_correct / total



In [29]:
# Cell 6: main training loop

RESULTS_DIR = REPO_ROOT / "results"
RESULTS_DIR.mkdir(exist_ok=True, parents=True)
best_path = RESULTS_DIR / "mobilenetv2_sw_best.pth"

best_val_acc = 0.0

print(f"Starting training for {EPOCHS} epochs on", device)
for epoch in range(1, EPOCHS + 1):
    print(f"\nEpoch {epoch}/{EPOCHS}")

    # ---- Train ----
    train_loss, train_acc = train_one_epoch(
        model, train_loader, optimizer, criterion, scaler, device
    )

    # ---- Validate ----
    val_loss, val_acc = eval_one_epoch(
        model, val_loader, criterion, device
    )

    # LR scheduler on validation accuracy
    scheduler.step(val_acc)

    print(f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f}")
    print(f"Val   loss: {val_loss:.4f}, Val   acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_path)
        print("  ðŸ”¥ New best model saved!")

print("\nTraining done.")
print("Best validation accuracy:", best_val_acc)
print("Best model path:", best_path)


Starting training for 8 epochs on cuda

Epoch 1/8


                                                                                                   

Train loss: 1.1933, Train acc: 0.6058
Val   loss: 0.9483, Val   acc: 0.6957
  ðŸ”¥ New best model saved!

Epoch 2/8


                                                                                                   

Train loss: 0.7082, Train acc: 0.7821
Val   loss: 0.5541, Val   acc: 0.8433
  ðŸ”¥ New best model saved!

Epoch 3/8


                                                                                                   

Train loss: 0.4924, Train acc: 0.8600
Val   loss: 0.4682, Val   acc: 0.8715
  ðŸ”¥ New best model saved!

Epoch 4/8


                                                                                                   

Train loss: 0.4378, Train acc: 0.8794
Val   loss: 0.3992, Val   acc: 0.8916
  ðŸ”¥ New best model saved!

Epoch 5/8


                                                                                                   

Train loss: 0.3895, Train acc: 0.8937
Val   loss: 0.3541, Val   acc: 0.9075
  ðŸ”¥ New best model saved!

Epoch 6/8


                                                                                                   

Train loss: 0.3537, Train acc: 0.9046
Val   loss: 0.3442, Val   acc: 0.9082
  ðŸ”¥ New best model saved!

Epoch 7/8


                                                                                                   

Train loss: 0.3638, Train acc: 0.9041
Val   loss: 0.3128, Val   acc: 0.9191
  ðŸ”¥ New best model saved!

Epoch 8/8


                                                                                                   

Train loss: 0.3308, Train acc: 0.9126
Val   loss: 0.3138, Val   acc: 0.9195
  ðŸ”¥ New best model saved!

Training done.
Best validation accuracy: 0.9195136831096392
Best model path: C:\Users\amrik\Data Science\Personal-PCB-Project\results\mobilenetv2_sw_best.pth
