# HW5 – Final Stacking-Only Ensemble (S2: XGBoost Meta-Model)

This notebook implements a **full end-to-end pipeline** for the wafer map classification homework,
using a **Stacking ensemble** as the final and only model (no voting):

1. Load structured `.npy` files:
   - `wafermap_train.npy` (with fields including `waferMap`, `failureType`, `dieSize`, ...)
   - `wafermap_test.npy` (with fields including `waferMap`, `dieSize`, ...)
2. Extract:
   - Wafer maps as images (variable shapes)
   - Failure types as labels (train only)
   - `dieSize` as a scalar feature
3. Resize each wafer map to **64×64** and compute geometric features + `dieSize` (tabular features).
4. Train three base models:
   - **CNN-only classifier** (ResNet18, grayscale input, 30 epochs with early stopping)
   - **ExtraTreesClassifier** (tabular-only)
   - **GradientBoostingClassifier** (tabular-only)
5. On a validation split:
   - Get `predict_proba` from each base model.
   - Concatenate probabilities → stacking features.
   - Train a **Stacking meta-model** using **XGBoost** (or LogisticRegression fallback).
6. On the full training set:
   - Retrain ExtraTrees & GradientBoost.
   - Get full-train & test probabilities for all base models.
   - Train the final stacking meta-model on full training stacking features.
7. Predict test failure types and save `scores.csv` with a single column `failureType`.

> Place this notebook in the same folder as `wafermap_train.npy` and `wafermap_test.npy`.

In [ ]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

from skimage.transform import resize
from skimage.measure import label, regionprops, perimeter

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models

from tqdm import tqdm

# Try to import XGBoost as a stronger meta-model; fall back to LogisticRegression if unavailable.
try:
    from xgboost import XGBClassifier
    HAS_XGB = True
except ImportError:
    XGBClassifier = None
    HAS_XGB = False

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', device)
print('Has XGBoost:', HAS_XGB)

## 1. Load structured `.npy` data

In [ ]:
train_raw = np.load('wafermap_train.npy', allow_pickle=True)
test_raw  = np.load('wafermap_test.npy',  allow_pickle=True)
print('Train size:', len(train_raw))
print('Test  size:', len(test_raw))
print('Train dtype:', train_raw.dtype)
print('Test  dtype:', test_raw.dtype)

## 2. Extract waferMap (list), failureType, dieSize

In [ ]:
X_maps_full = [r['waferMap'] for r in train_raw]
y_labels_full = [r['failureType'] for r in train_raw]
die_full = np.array([r['dieSize'] for r in train_raw], float)

X_maps_test = [r['waferMap'] for r in test_raw]
die_test = np.array([r['dieSize'] for r in test_raw], float)

print('Sample train map shape:', X_maps_full[0].shape)
print('Sample test  map shape:', X_maps_test[0].shape)

## 3. Geometric features + dieSize (tabular features)

In [ ]:
def resize_w(mp, target=(64, 64)):
    """Resize wafer map to target size using nearest-neighbor (order=0) to preserve labels."""
    return resize(mp, target, order=0, preserve_range=True, anti_aliasing=False).astype(mp.dtype)

def geo_feats(mp):
    """Compute geometric features for a single wafer map.
    Returns a dict with keys: area, per, maxd, mind, maj, minr, sol, ecc, yl, eyl.
    """
    mp = np.array(mp)
    non_die = (mp == -1)
    die = ~non_die
    fail = (mp > 0) & die
    h, w = mp.shape
    cr, cc = (h - 1) / 2, (w - 1) / 2
    rad = min(h, w) / 2
    total = die.sum()
    if total == 0:
        return dict(area=0, per=0, maxd=0, mind=0, maj=0, minr=0, sol=0, ecc=0, yl=0, eyl=0)
    fc = fail.sum()
    yl = fc / total
    if fc == 0:
        return dict(area=0, per=0, maxd=0, mind=0, maj=0, minr=0, sol=0, ecc=0, yl=float(yl), eyl=0)

    coords = np.column_stack(np.nonzero(fail))
    d = np.sqrt((coords[:, 0] - cr) ** 2 + (coords[:, 1] - cc) ** 2)
    maxd, mind = d.max() / rad, d.min() / rad

    per = perimeter(fail.astype(float)) / rad

    lab = label(fail)
    regs = regionprops(lab)
    if len(regs) == 0:
        maj = minr = sol = ecc = 0
    else:
        r = max(regs, key=lambda x: x.area)
        maj = r.major_axis_length / (2 * rad)
        minr = r.minor_axis_length / (2 * rad)
        sol = r.solidity or 0
        ecc = r.eccentricity or 0

    eb = np.zeros_like(die, bool)
    eb[0:2, :] = eb[-2:, :] = True
    eb[:, 0:2] = eb[:, -2:] = True
    eb &= die
    td = eb.sum()
    eyl = (fail & eb).sum() / td if td > 0 else 0

    return dict(area=fc / total, per=per, maxd=maxd, mind=mind,
                maj=maj, minr=minr, sol=sol, ecc=ecc, yl=yl, eyl=eyl)

def extract_tab(maps, dies):
    feats = []
    for mp, dsize in zip(maps, dies):
        r = geo_feats(resize_w(mp))
        r['die'] = float(dsize)
        feats.append(r)
    return pd.DataFrame(feats)

X_tab_full = extract_tab(X_maps_full, die_full)
X_tab_test = extract_tab(X_maps_test, die_test)
print('Tabular train shape:', X_tab_full.shape)
print('Tabular test shape:', X_tab_test.shape)
X_tab_full.head()

## 4. Label encoding + unified train/valid split (index-based)

In [ ]:
le = LabelEncoder()
y_enc_full = le.fit_transform(y_labels_full)
num_classes = len(le.classes_)
print('Classes:', list(le.classes_))

indices = np.arange(len(X_maps_full))
idx_tr, idx_va, y_tr, y_va = train_test_split(
    indices, y_enc_full, test_size=0.2, random_state=42, stratify=y_enc_full
)

maps_tr = [X_maps_full[i] for i in idx_tr]
maps_va = [X_maps_full[i] for i in idx_va]
tab_tr = X_tab_full.iloc[idx_tr].values
tab_va = X_tab_full.iloc[idx_va].values

print('Train size:', len(maps_tr), 'Valid size:', len(maps_va))

## 5. WaferDataset + DataLoaders for CNN

In [ ]:
class WaferDataset(Dataset):
    def __init__(self, maps, labels=None):
        self.maps = maps
        self.labels = labels
    def __len__(self):
        return len(self.maps)
    def __getitem__(self, idx):
        mp = resize_w(self.maps[idx]).astype('float32')
        mp = (mp - mp.mean()) / (mp.std() + 1e-6)
        mp = np.expand_dims(mp, 0)  # 1-channel
        if self.labels is None:
            return torch.tensor(mp)
        else:
            return torch.tensor(mp), torch.tensor(self.labels[idx], dtype=torch.long)

train_ds = WaferDataset(maps_tr, y_tr)
valid_ds = WaferDataset(maps_va, y_va)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=32)
print('DataLoaders ready.')

## 6. ResNet18 CNN-only classifier (backbone + head)

In [ ]:
class ResNetBackbone(nn.Module):
    def __init__(self, emb_dim=128):
        super().__init__()
        m = models.resnet18(weights=None)
        m.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.features = nn.Sequential(*list(m.children())[:-1])
        self.fc = nn.Linear(512, emb_dim)
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

class CNNClassifier(nn.Module):
    def __init__(self, num_classes, emb_dim=128):
        super().__init__()
        self.backbone = ResNetBackbone(emb_dim)
        self.head = nn.Linear(emb_dim, num_classes)
    def forward(self, x, return_emb=False):
        z = self.backbone(x)
        if return_emb:
            return z
        return self.head(z)

cnn_model = CNNClassifier(num_classes=num_classes, emb_dim=128).to(device)
crit_cnn = nn.CrossEntropyLoss()
opt_cnn = optim.Adam(cnn_model.parameters(), lr=1e-3)
print('CNN model ready.')

## 7. Train CNN (30 epochs) with early stopping (patience=5)

In [ ]:
def eval_cnn(model, loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            pred = logits.argmax(1)
            correct += (pred == yb).sum().item()
            total += len(yb)
    return correct / total

EPOCHS_CNN = 30
PATIENCE = 5
best_val_acc = 0.0
wait = 0

for epoch in range(EPOCHS_CNN):
    cnn_model.train()
    running_loss = 0.0
    for xb, yb in tqdm(train_loader, desc=f'Epoch {epoch+1}/{EPOCHS_CNN}'):
        xb, yb = xb.to(device), yb.to(device)
        opt_cnn.zero_grad()
        logits = cnn_model(xb)
        loss = crit_cnn(logits, yb)
        loss.backward()
        opt_cnn.step()
        running_loss += loss.item() * len(yb)
    train_loss = running_loss / len(train_ds)
    val_acc = eval_cnn(cnn_model, valid_loader)
    print(f'Epoch {epoch+1}/{EPOCHS_CNN} - train loss: {train_loss:.4f}, val acc: {val_acc:.4f}')

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        wait = 0
        torch.save(cnn_model.state_dict(), 'best_cnn.pth')
        print('  Saved new best CNN model.')
    else:
        wait += 1
        if wait >= PATIENCE:
            print('Early stopping triggered.')
            break

# Load best weights
cnn_model.load_state_dict(torch.load('best_cnn.pth', map_location=device))
print('Loaded best CNN weights with val acc =', best_val_acc)

## 8. Train ExtraTrees & GradientBoosting (tabular-only base models)

In [ ]:
et_model = ExtraTreesClassifier(
    n_estimators=800,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    class_weight='balanced_subsample',
    n_jobs=-1,
    random_state=42,
)

gb_model = GradientBoostingClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    random_state=42,
)

et_model.fit(tab_tr, y_tr)
gb_model.fit(tab_tr, y_tr)

et_va_probs = et_model.predict_proba(tab_va)
gb_va_probs = gb_model.predict_proba(tab_va)
et_va_pred = et_va_probs.argmax(1)
gb_va_pred = gb_va_probs.argmax(1)
et_va_acc = accuracy_score(y_va, et_va_pred)
gb_va_acc = accuracy_score(y_va, gb_va_pred)
print('ExtraTrees val acc:', et_va_acc)
print('GradBoost  val acc:', gb_va_acc)

## 9. CNN validation probabilities (base model 1)

In [ ]:
def get_cnn_probs(model, loader):
    model.eval()
    all_probs = []
    all_labels = []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            logits = model(xb)
            probs = torch.softmax(logits, dim=1).cpu().numpy()
            all_probs.append(probs)
            all_labels.append(yb.numpy())
    return np.vstack(all_probs), np.concatenate(all_labels)

cnn_va_probs, cnn_va_labels = get_cnn_probs(cnn_model, valid_loader)
cnn_va_pred = cnn_va_probs.argmax(1)
cnn_va_acc = accuracy_score(y_va, cnn_va_pred)
print('CNN-only val acc:', cnn_va_acc)
assert np.array_equal(cnn_va_labels, y_va), 'Label mismatch in CNN valid loader'

## 10. Stacking meta-model on validation (S2: XGBoost if available, otherwise LogisticRegression)

In [ ]:
# Stacking features on validation
stack_va_feats = np.concatenate([cnn_va_probs, et_va_probs, gb_va_probs], axis=1)

if HAS_XGB:
    stack_meta_val = XGBClassifier(
        n_estimators=400,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.9,
        colsample_bytree=0.8,
        objective='multi:softprob',
        num_class=num_classes,
        tree_method='hist',
        random_state=42,
    )
else:
    stack_meta_val = LogisticRegression(max_iter=1000, multi_class='auto')

stack_meta_val.fit(stack_va_feats, y_va)
stack_va_pred = stack_meta_val.predict(stack_va_feats)
stack_va_acc = accuracy_score(y_va, stack_va_pred)
print('Stacking (meta-model) val acc =', stack_va_acc)

## 11. Retrain tabular base models on full training data & get full/test probabilities

In [ ]:
et_full = ExtraTreesClassifier(
    n_estimators=800,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    class_weight='balanced_subsample',
    n_jobs=-1,
    random_state=42,
)

gb_full = GradientBoostingClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    random_state=42,
)

et_full.fit(X_tab_full.values, y_enc_full)
gb_full.fit(X_tab_full.values, y_enc_full)

et_full_probs = et_full.predict_proba(X_tab_full.values)
gb_full_probs = gb_full.predict_proba(X_tab_full.values)
et_test_probs = et_full.predict_proba(X_tab_test.values)
gb_test_probs = gb_full.predict_proba(X_tab_test.values)
print('Full tabular models retrained.')

## 12. CNN full-training and test probabilities

In [ ]:
full_train_ds = WaferDataset(X_maps_full, y_enc_full)
full_train_loader = DataLoader(full_train_ds, batch_size=32, shuffle=False)
test_ds = WaferDataset(X_maps_test, None)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

def get_cnn_probs_infer(model, loader):
    model.eval()
    all_probs = []
    with torch.no_grad():
        for batch in loader:
            if isinstance(batch, (list, tuple)):
                xb = batch[0]
            else:
                xb = batch
            xb = xb.to(device)
            logits = model(xb)
            probs = torch.softmax(logits, dim=1).cpu().numpy()
            all_probs.append(probs)
    return np.vstack(all_probs)

cnn_full_probs = get_cnn_probs_infer(cnn_model, full_train_loader)
cnn_test_probs = get_cnn_probs_infer(cnn_model, test_loader)
print('CNN full & test probabilities ready.')

## 13. Train final stacking meta-model on full training & predict test (Stacking only)

In [ ]:
# Full training stacking features
stack_full_feats = np.concatenate([
    cnn_full_probs,
    et_full_probs,
    gb_full_probs,
], axis=1)

if HAS_XGB:
    stack_meta = XGBClassifier(
        n_estimators=600,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.9,
        colsample_bytree=0.8,
        objective='multi:softprob',
        num_class=num_classes,
        tree_method='hist',
        random_state=42,
    )
else:
    stack_meta = LogisticRegression(max_iter=2000, multi_class='auto')

stack_meta.fit(stack_full_feats, y_enc_full)
print('Final stacking meta-model trained.')

# Test stacking features
stack_test_feats = np.concatenate([
    cnn_test_probs,
    et_test_probs,
    gb_test_probs,
], axis=1)

final_test_enc = stack_meta.predict(stack_test_feats)
final_test_labels = le.inverse_transform(final_test_enc)

out_df = pd.DataFrame({'failureType': final_test_labels})
out_df.to_csv('scores.csv', index=False)
print('scores.csv saved.')
out_df.head()