In [2]:
# GPU sanity check  (rerun after every restart)
import torch

gpu_available = torch.cuda.is_available()
device = torch.device("cuda" if gpu_available else "cpu")
gpu_name = torch.cuda.get_device_name(0) if gpu_available else "CPU"

print(f"Torch: {torch.__version__} | CUDA available: {gpu_available}")
print(f"Device: {device} | Name: {gpu_name}")

Torch: 2.8.0+cpu | CUDA available: False
Device: cpu | Name: CPU


# CELL 2 — Pin working versions (rerun once after a fresh session)
# PyTorch 2.4.0 (CUDA 12.1 build) + matching libs
!pip -q install --force-reinstall --no-cache-dir \
  torch==2.4.0+cu121 torchvision==0.19.0+cu121 torchaudio==2.4.0+cu121 \
  --index-url https://download.pytorch.org/whl/cu121

!pip -q install --force-reinstall --no-cache-dir \
  numpy==2.0.2 pandas==2.2.2 scikit-learn==1.6.1 matplotlib==3.10.0 seaborn==0.13.2 \
  tqdm==4.66.4 tabulate==0.9.0



In [3]:
#  Project paths RERUN
from pathlib import Path

def find_project_root(start: Path) -> Path:
    """
    Walk up from 'start' until we find a .git folder (repo root).
    If not found, fall back to current working directory.
    """
    for p in [start, *start.parents]:
        if (p / ".git").exists():
            return p
    return start


PROJECT_ROOT = find_project_root(Path.cwd())


DIRS = [
    "data",             
    "logs",             
    "models",           
    "figures",          
    "results",          
    "notebooks",        
    "experiments/configs",
    "experiments/runs", 
    "outputs"          
]

for d in DIRS:
    (PROJECT_ROOT / d).mkdir(parents=True, exist_ok=True)


DATA_DIR    = PROJECT_ROOT / "data"
MODELS_DIR  = PROJECT_ROOT / "models"
RUNS_DIR    = PROJECT_ROOT / "experiments" / "runs"
FIG_DIR     = PROJECT_ROOT / "figures"
RESULTS_DIR = PROJECT_ROOT / "results"

print("Project root:", PROJECT_ROOT)
print("Data dir    :", DATA_DIR)
print("Models dir  :", MODELS_DIR)
print("Runs dir    :", RUNS_DIR)


Project root: c:\Users\Admin\Desktop\ffnn-healing
Data dir    : c:\Users\Admin\Desktop\ffnn-healing\data
Models dir  : c:\Users\Admin\Desktop\ffnn-healing\models
Runs dir    : c:\Users\Admin\Desktop\ffnn-healing\experiments\runs


In [4]:
# Utilities, seed, and experiment logging  RERUN
import os, random, time
from pathlib import Path
import torch, numpy as np, pandas as pd

# We expect PROJECT_ROOT from the previous "Project paths" cell.
assert 'PROJECT_ROOT' in globals(), "Run the Project paths cell first."

# Device: reuse the 'device' from the GPU sanity cell if present; otherwise detect now
DEVICE = globals().get('device', torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("Using device:", DEVICE)

def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def timestamp() -> str:
    return time.strftime("%Y%m%d-%H%M%S")

def exp_path(kind: str = "results") -> Path:
    p = PROJECT_ROOT / kind
    p.mkdir(parents=True, exist_ok=True)
    return p

def log_result(row: dict, csv_name: str = "experiments_log.csv") -> str:
    csv_file = exp_path("results") / csv_name
    if csv_file.exists():
        df0 = pd.read_csv(csv_file)
        df = pd.concat([df0, pd.DataFrame([row])], ignore_index=True)
    else:
        df = pd.DataFrame([row])
    df.to_csv(csv_file, index=False)
    return str(csv_file)

set_seed(123)
print("Seed set. Log file will be:", exp_path('results') / 'experiments_log.csv')


Using device: cpu
Seed set. Log file will be: c:\Users\Admin\Desktop\ffnn-healing\results\experiments_log.csv


In [5]:
# Logger sanity test   OPTIONAL 
from pathlib import Path
import pandas as pd

row = {
    "timestamp": timestamp(),  
    "phase": "sanity",
    "note": "logger test"
}
csv_file = log_result(row)      

csv_path = Path(csv_file)
print("CSV exists:", csv_path.exists(), "| path:", csv_path)
print(pd.read_csv(csv_path).tail(1))


CSV exists: True | path: c:\Users\Admin\Desktop\ffnn-healing\results\experiments_log.csv
         timestamp   phase         note
0  20250813-102110  sanity  logger test


In [6]:
# DATASET PREP (Option A)
# RUN-WHEN-CHANGING-CONFIG 

from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np, os, json, joblib, pandas as pd, collections
from pathlib import Path

set_seed(42)  # from our utilities cell

# Use project-local data directory (created earlier)
DATA_ROOT = DATA_DIR  # from the Project paths cell
DATA_ROOT.mkdir(parents=True, exist_ok=True)

def split_scale_save(X, y, name, task_type, feature_names=None):
    """
    Splits (70/15/15), scales features (fit on train), saves arrays+scaler+meta to disk.
    task_type: 'clf' or 'reg'
    """
    # Split
    strat = y if task_type == "clf" else None
    X_train, X_tmp, y_train, y_tmp = train_test_split(
        X, y, test_size=0.30, random_state=42, stratify=strat
    )
    strat_tmp = y_tmp if task_type == "clf" else None
    X_val, X_test, y_val, y_test = train_test_split(
        X_tmp, y_tmp, test_size=0.50, random_state=42, stratify=strat_tmp
    )

    # Scale features using train stats only
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train).astype(np.float32)
    X_val   = scaler.transform(X_val).astype(np.float32)
    X_test  = scaler.transform(X_test).astype(np.float32)

    # Ensure y dtypes
    if task_type == "clf":
        y_train = y_train.astype(np.int64); y_val = y_val.astype(np.int64); y_test = y_test.astype(np.int64)
    else:
        y_train = y_train.astype(np.float32); y_val = y_val.astype(np.float32); y_test = y_test.astype(np.float32)

    # Save
    OUT = DATA_ROOT / name
    OUT.mkdir(parents=True, exist_ok=True)
    np.save(OUT / "X_train.npy", X_train); np.save(OUT / "y_train.npy", y_train)
    np.save(OUT / "X_val.npy",   X_val);   np.save(OUT / "y_val.npy",   y_val)
    np.save(OUT / "X_test.npy",  X_test);  np.save(OUT / "y_test.npy",  y_test)
    joblib.dump(scaler, OUT / "scaler.joblib")

    # Metadata
    meta = {
        "name": name,
        "task": task_type,
        "n_features": int(X_train.shape[1]),
        "splits": {"train": int(len(y_train)), "val": int(len(y_val)), "test": int(len(y_test))},
        "feature_names": list(feature_names) if feature_names is not None else None
    }
    if task_type == "clf":
        meta["class_counts"] = {
            "train": {int(k): int(v) for k,v in collections.Counter(y_train).items()},
            "val":   {int(k): int(v) for k,v in collections.Counter(y_val).items()},
            "test":  {int(k): int(v) for k,v in collections.Counter(y_test).items()},
        }

    with open(OUT / "meta.json", "w") as f:
        f.write(json.dumps(meta, indent=2))

    # Log a one-line summary to our experiments log
    log_result({
        "timestamp": timestamp(),
        "phase": "data_prep",
        "dataset": name,
        "task": task_type,
        "n_features": meta["n_features"],
        "train": meta["splits"]["train"],
        "val": meta["splits"]["val"],
        "test": meta["splits"]["test"]
    })

    print(f"[{name}] Saved. features={meta['n_features']} | splits={meta['splits']}")
    if task_type == "clf":
        print(f"  Class balance (train/val/test): {meta['class_counts']}")
    return meta

# ---- Classification: Breast Cancer (binary) ----
bc = load_breast_cancer()
meta_bc = split_scale_save(
    bc.data, bc.target, name="breast_cancer", task_type="clf", feature_names=bc.feature_names
)

# ---- Regression: California Housing ----
cal = fetch_california_housing()
meta_cal = split_scale_save(
    cal.data, cal.target, name="california_housing", task_type="reg", feature_names=cal.feature_names
)

def print_tree(root: Path, max_depth=2, prefix=""):
    root = Path(root)
    def _walk(p: Path, depth: int, pref: str):
        if depth > max_depth: 
            return
        for child in sorted(p.iterdir()):
            print(pref + ("└─ " if child.is_file() else "├─ ") + child.name)
            if child.is_dir():
                _walk(child, depth+1, pref + "│  ")
    print(str(root))
    _walk(root, 0, "")
    
print("\nSaved folders:")
print_tree(DATA_ROOT, max_depth=2)


[breast_cancer] Saved. features=30 | splits={'train': 398, 'val': 85, 'test': 86}
  Class balance (train/val/test): {'train': {1: 250, 0: 148}, 'val': {1: 53, 0: 32}, 'test': {1: 54, 0: 32}}
[california_housing] Saved. features=8 | splits={'train': 14448, 'val': 3096, 'test': 3096}

Saved folders:
c:\Users\Admin\Desktop\ffnn-healing\data
├─ breast_cancer
│  └─ meta.json
│  └─ scaler.joblib
│  └─ X_test.npy
│  └─ X_train.npy
│  └─ X_val.npy
│  └─ y_test.npy
│  └─ y_train.npy
│  └─ y_val.npy
├─ california_housing
│  └─ meta.json
│  └─ scaler.joblib
│  └─ X_test.npy
│  └─ X_train.npy
│  └─ X_val.npy
│  └─ y_test.npy
│  └─ y_train.npy
│  └─ y_val.npy


In [None]:
# === MINIMAL BOOTSTRAP (run after every restart) ===
from google.colab import drive
drive.mount('/content/drive')

import os, random, time, torch, numpy as np, pandas as pd
from pathlib import Path

PROJECT_ROOT = Path("/content/drive/MyDrive/FFNN_Healing_Thesis")

def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def timestamp(): return time.strftime("%Y%m%d-%H%M%S")

def exp_path(kind="results"):
    p = PROJECT_ROOT / kind
    p.mkdir(parents=True, exist_ok=True)
    return p

def log_result(row: dict, csv_name="experiments_log.csv"):
    csv_file = exp_path("results") / csv_name
    df = pd.DataFrame([row])
    if csv_file.exists():
        df0 = pd.read_csv(csv_file); df = pd.concat([df0, df], ignore_index=True)
    df.to_csv(csv_file, index=False)
    return str(csv_file)

print("Ready. Torch:", torch.__version__, "| Device:", DEVICE, "| Root:", PROJECT_ROOT)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Ready. Torch: 2.4.0+cu121 | Device: cuda | Root: /content/drive/MyDrive/FFNN_Healing_Thesis


In [9]:
# COMMONS 
# RERUN-AFTER-RESTART

import os, json, math, time
import numpy as np
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from pathlib import Path

# Reuse what we already set earlier
assert 'PROJECT_ROOT' in globals(), "Run the Project paths cell first."
assert 'FIG_DIR' in globals() and 'MODELS_DIR' in globals() and 'RESULTS_DIR' in globals(), "Project dirs not set."
DEVICE = globals().get('device', torch.device("cuda" if torch.cuda.is_available() else "cpu"))
set_seed(42)

# Ensure dirs exist (safe if they already do)
for d in [FIG_DIR, MODELS_DIR, RESULTS_DIR]:
    Path(d).mkdir(parents=True, exist_ok=True)

class NumpyDataset(Dataset):
    def __init__(self, X_path, y_path, task):
        self.X = np.load(X_path).astype(np.float32)
        self.y = np.load(y_path)
        self.task = task
    def __len__(self):
        return len(self.y)
    def __getitem__(self, i):
        x = torch.from_numpy(self.X[i])
        if self.task == "clf":
            y = torch.tensor(int(self.y[i]), dtype=torch.long)
        else:
            y = torch.tensor(float(self.y[i]), dtype=torch.float32).unsqueeze(0)
        return x, y

def make_loaders(data_dir, task, batch_size=128):
    data_dir = Path(data_dir)
    ds_train = NumpyDataset(data_dir/"X_train.npy", data_dir/"y_train.npy", task)
    ds_val   = NumpyDataset(data_dir/"X_val.npy",   data_dir/"y_val.npy",   task)
    ds_test  = NumpyDataset(data_dir/"X_test.npy",  data_dir/"y_test.npy",  task)
    train_loader = DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=False)
    val_loader   = DataLoader(ds_val,   batch_size=batch_size, shuffle=False, drop_last=False)
    test_loader  = DataLoader(ds_test,  batch_size=batch_size, shuffle=False, drop_last=False)
    n_features = ds_train.X.shape[1]
    return train_loader, val_loader, test_loader, n_features

def init_kaiming(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            nn.init.zeros_(m.bias)

class FFNN_Classifier(nn.Module):
    def __init__(self, n_in, hidden=[64, 64, 32], n_out=2, p_drop=0.1):
        super().__init__()
        layers, prev = [], n_in
        for h in hidden:
            layers += [nn.Linear(prev, h), nn.ReLU(), nn.Dropout(p_drop)]
            prev = h
        layers += [nn.Linear(prev, n_out)]
        self.net = nn.Sequential(*layers)
        self.apply(init_kaiming)
    def forward(self, x):
        return self.net(x)

class FFNN_Regression(nn.Module):
    def __init__(self, n_in, hidden=[64, 64, 32], p_drop=0.1):
        super().__init__()
        layers, prev = [], n_in
        for h in hidden:
            layers += [nn.Linear(prev, h), nn.ReLU(), nn.Dropout(p_drop)]
            prev = h
        layers += [nn.Linear(prev, 1)]
        self.net = nn.Sequential(*layers)
        self.apply(init_kaiming)
    def forward(self, x):
        return self.net(x)

def train_model(model, train_loader, val_loader, task, max_epochs=200, lr=1e-3, weight_decay=1e-4, patience=20, run_name="run"):
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss() if task=="clf" else nn.MSELoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.5, patience=5)


    history = {"epoch": [], "train_loss": [], "val_loss": [], "val_metric": []}
    best_val, best_path, epochs_no_improve = float("inf"), Path(MODELS_DIR) / f"{run_name}_{timestamp()}_best.pt", 0

    for epoch in range(1, max_epochs+1):
        model.train(); train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad()
            out = model(xb)
            loss = criterion(out, yb if task=="clf" else yb)
            loss.backward(); opt.step()
            train_losses.append(loss.item())
        train_loss = float(np.mean(train_losses)) if train_losses else 0.0

        model.eval(); val_losses = []; y_true_list, y_pred_list = [], []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                out = model(xb)
                loss = criterion(out, yb if task=="clf" else yb)
                val_losses.append(loss.item())
                if task == "clf":
                    preds = out.argmax(dim=1).detach().cpu().numpy()
                    y_true_list.append(yb.detach().cpu().numpy())
                    y_pred_list.append(preds)
                else:
                    y_true_list.append(yb.detach().cpu().numpy().squeeze())
                    y_pred_list.append(out.detach().cpu().numpy().squeeze())
        val_loss = float(np.mean(val_losses)) if val_losses else 0.0

        if task == "clf":
            y_true = np.concatenate(y_true_list); y_pred = np.concatenate(y_pred_list)
            val_metric = float(accuracy_score(y_true, y_pred))
        else:
            y_true = np.array(np.concatenate([np.atleast_1d(a) for a in y_true_list]))
            y_pred = np.array(np.concatenate([np.atleast_1d(a) for a in y_pred_list]))
            val_metric = float(math.sqrt(mean_squared_error(y_true, y_pred)))

        scheduler.step(val_loss)
        history["epoch"].append(epoch); history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss); history["val_metric"].append(val_metric)

        if val_loss < best_val - 1e-8:
            best_val = val_loss; epochs_no_improve = 0
            torch.save(model.state_dict(), best_path); best_mark="*"
        else:
            epochs_no_improve += 1; best_mark=""
        if epoch % 10 == 0 or best_mark == "*":
            print(f"[{run_name}] epoch {epoch:03d} | train {train_loss:.4f} | val {val_loss:.4f} | metric {val_metric:.4f} {best_mark}")
        if epochs_no_improve >= patience:
            print(f"[{run_name}] Early stopping at epoch {epoch}. Best val_loss={best_val:.4f}")
            break
    return best_path, history

def evaluate_model(model, loader, task):
    model.eval(); y_true_list, y_pred_list = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            out = model(xb)
            if task == "clf":
                preds = out.argmax(dim=1)
                y_true_list.append(yb.cpu().numpy()); y_pred_list.append(preds.cpu().numpy())
            else:
                y_true_list.append(yb.cpu().numpy().squeeze()); y_pred_list.append(out.cpu().numpy().squeeze())
    if task == "clf":
        y_true = np.concatenate(y_true_list); y_pred = np.concatenate(y_pred_list)
        return {"accuracy": float(accuracy_score(y_true, y_pred)),
                "f1": float(f1_score(y_true, y_pred, zero_division=0))}
    else:
        y_true = np.array(np.concatenate([np.atleast_1d(a) for a in y_true_list]))
        y_pred = np.array(np.concatenate([np.atleast_1d(a) for a in y_pred_list]))
        mae = mean_absolute_error(y_true, y_pred)
        rmse = math.sqrt(mean_squared_error(y_true, y_pred))
        return {"mae": float(mae), "rmse": float(rmse)}

def plot_history(history, run_name, task):
    fig = plt.figure(figsize=(6,4))
    plt.plot(history["epoch"], history["train_loss"], label="train_loss")
    plt.plot(history["epoch"], history["val_loss"], label="val_loss")
    plt.xlabel("epoch"); plt.ylabel("loss"); plt.legend(); plt.title(f"{run_name} loss")
    fig_path1 = Path(FIG_DIR) / f"{run_name}_loss.png"
    plt.savefig(fig_path1, bbox_inches="tight"); plt.close()

    fig = plt.figure(figsize=(6,4))
    ylabel = "accuracy" if task=="clf" else "RMSE"
    plt.plot(history["epoch"], history["val_metric"], label=f"val_{ylabel}")
    plt.xlabel("epoch"); plt.ylabel(ylabel); plt.legend(); plt.title(f"{run_name} {ylabel}")
    fig_path2 = Path(FIG_DIR) / f"{run_name}_{ylabel}.png"
    plt.savefig(fig_path2, bbox_inches="tight"); plt.close()
    return str(fig_path1), str(fig_path2)

print("Commons ready ✓  | DEVICE:", DEVICE)


Commons ready ✓  | DEVICE: cpu


In [10]:
# Baseline training — Breast Cancer 
from pathlib import Path
import json, torch


BREAST_DIR = DATA_DIR / "breast_cancer"
assert (BREAST_DIR / "X_train.npy").exists(), "Run the dataset prep cell first to create local splits."

train_loader, val_loader, test_loader, n_features = make_loaders(BREAST_DIR, task="clf", batch_size=128)
print("n_features:", n_features)

run_name = "clf_breast_cancer_baseline"
model_clf = FFNN_Classifier(n_in=n_features, hidden=[64,64,32], n_out=2, p_drop=0.1)

best_path, hist = train_model(
    model_clf, train_loader, val_loader, task="clf",
    max_epochs=200, lr=1e-3, weight_decay=1e-4, patience=20, run_name=run_name
)

# Evaluate best checkpoint on test
best_model = FFNN_Classifier(n_in=n_features, hidden=[64,64,32], n_out=2, p_drop=0.1).to(DEVICE)
best_model.load_state_dict(torch.load(best_path, map_location=DEVICE))
metrics_test = evaluate_model(best_model, test_loader, task="clf")
print("TEST metrics:", metrics_test)

# Plots + metrics
fig_loss, fig_metric = plot_history(hist, run_name, task="clf")
metrics_path = Path(RESULTS_DIR) / f"{run_name}_{timestamp()}_metrics.json"
with open(metrics_path, "w") as f:
    json.dump({"test": metrics_test}, f, indent=2)

# Log run summary to CSV
log_result({
    "timestamp": timestamp(),
    "phase": "baseline",
    "dataset": "breast_cancer",
    "task": "clf",
    "model": "FFNN_Classifier[64,64,32]",
    "test_accuracy": metrics_test["accuracy"],
    "test_f1": metrics_test["f1"],
    "best_ckpt": str(best_path),
    "fig_loss": fig_loss,
    "fig_metric": fig_metric
})

print("Saved plots:", fig_loss, "|", fig_metric)
print("Best model path:", best_path)
print("Metrics saved to:", metrics_path)


n_features: 30
[clf_breast_cancer_baseline] epoch 001 | train 0.6813 | val 0.4313 | metric 0.9059 *
[clf_breast_cancer_baseline] epoch 002 | train 0.4232 | val 0.3212 | metric 0.9412 *
[clf_breast_cancer_baseline] epoch 003 | train 0.3494 | val 0.2498 | metric 0.9647 *
[clf_breast_cancer_baseline] epoch 004 | train 0.2894 | val 0.2014 | metric 0.9529 *
[clf_breast_cancer_baseline] epoch 005 | train 0.2403 | val 0.1667 | metric 0.9529 *
[clf_breast_cancer_baseline] epoch 006 | train 0.1997 | val 0.1404 | metric 0.9529 *
[clf_breast_cancer_baseline] epoch 007 | train 0.2006 | val 0.1206 | metric 0.9647 *
[clf_breast_cancer_baseline] epoch 008 | train 0.2253 | val 0.1066 | metric 0.9765 *
[clf_breast_cancer_baseline] epoch 009 | train 0.1667 | val 0.0989 | metric 0.9765 *
[clf_breast_cancer_baseline] epoch 010 | train 0.1386 | val 0.0924 | metric 0.9765 *
[clf_breast_cancer_baseline] epoch 011 | train 0.1131 | val 0.0865 | metric 0.9765 *
[clf_breast_cancer_baseline] epoch 012 | train 0.1

In [11]:
# === REGRESSION BASELINE (California Housing) 
from pathlib import Path
import json, torch

CAL_DIR = DATA_DIR / "california_housing"   
assert (CAL_DIR / "X_train.npy").exists(), "Run the dataset prep cell first to create local splits."

train_loader, val_loader, test_loader, n_features = make_loaders(CAL_DIR, task="reg", batch_size=256)
print("n_features:", n_features)

run_name = "reg_california_baseline"
model_reg = FFNN_Regression(n_in=n_features, hidden=[128,64,32], p_drop=0.1)

best_path, hist = train_model(
    model_reg, train_loader, val_loader, task="reg",
    max_epochs=250, lr=1e-3, weight_decay=1e-4, patience=25, run_name=run_name
)

# Load best checkpoint and evaluate on test
best_model = FFNN_Regression(n_in=n_features, hidden=[128,64,32], p_drop=0.1).to(DEVICE)
state = torch.load(best_path, map_location=DEVICE)   
best_model.load_state_dict(state)

metrics_test = evaluate_model(best_model, test_loader, task="reg")
print("TEST metrics:", metrics_test) 

# Plots + metrics file
fig_loss, fig_metric = plot_history(hist, run_name, task="reg")
metrics_path = Path(RESULTS_DIR) / f"{run_name}_{timestamp()}_metrics.json"
with open(metrics_path, "w") as f:
    json.dump({"test": metrics_test}, f, indent=2)

# Log run summary to CSV
log_result({
    "timestamp": timestamp(),
    "phase": "baseline",
    "dataset": "california_housing",
    "task": "reg",
    "model": "FFNN_Regression[128,64,32]",
    "test_mae": metrics_test["mae"],
    "test_rmse": metrics_test["rmse"],
    "best_ckpt": str(best_path),
    "fig_loss": fig_loss,
    "fig_metric": fig_metric
})

print("Saved plots:", fig_loss, "|", fig_metric)
print("Best model path:", best_path)
print("Metrics saved to:", metrics_path)


n_features: 8
[reg_california_baseline] epoch 001 | train 2.2803 | val 0.9713 | metric 0.9606 *
[reg_california_baseline] epoch 002 | train 1.0828 | val 0.7501 | metric 0.8398 *
[reg_california_baseline] epoch 003 | train 0.8886 | val 0.6839 | metric 0.7993 *
[reg_california_baseline] epoch 004 | train 0.8055 | val 0.5888 | metric 0.7460 *
[reg_california_baseline] epoch 005 | train 0.7231 | val 0.5782 | metric 0.7338 *
[reg_california_baseline] epoch 006 | train 0.8544 | val 0.5396 | metric 0.6977 *
[reg_california_baseline] epoch 007 | train 0.6485 | val 0.5157 | metric 0.6987 *
[reg_california_baseline] epoch 008 | train 0.6164 | val 0.4713 | metric 0.6667 *
[reg_california_baseline] epoch 009 | train 0.5646 | val 0.4703 | metric 0.6645 *
[reg_california_baseline] epoch 010 | train 0.7211 | val 0.4758 | metric 0.6679 
[reg_california_baseline] epoch 011 | train 0.5638 | val 0.4542 | metric 0.6617 *
[reg_california_baseline] epoch 012 | train 0.5220 | val 0.4320 | metric 0.6394 *
[re

In [12]:
#  DAMAGE ENGINE
# RERUN-AFTER-RESTART

import copy
import numpy as np
import torch
import torch.nn as nn
from pathlib import Path

# Re-use: DEVICE, set_seed, log_result, evaluate_model, make_loaders, timestamp,
# PROJECT_ROOT, MODELS_DIR, RESULTS_DIR already defined earlier.
assert 'PROJECT_ROOT' in globals(), "Run the Project paths cell first."
assert 'RESULTS_DIR' in globals(), "Run the Project paths cell first (defines RESULTS_DIR)."

def get_linear_layers(model: nn.Module):
    """Return list of (seq_index_in_model_net, linear_module) for nn.Linear layers in forward order."""
    layers = []
    for i, m in enumerate(model.net):
        if isinstance(m, nn.Linear):
            layers.append((i, m))
    return layers

def summarize_linear_layers(model):
    """Return a list of dicts describing each Linear layer (useful to pick which layer to damage)."""
    info = []
    for li, lin in get_linear_layers(model):
        info.append({"seq_index": li, "in_features": lin.in_features, "out_features": lin.out_features})
    return info

def clone_from_checkpoint(model_ctor, ckpt_path: str):
    """
    Build a fresh model via `model_ctor()` and load weights from `ckpt_path`.
    `model_ctor` should be a zero-arg function returning an *already-constructed* model with the same architecture.
    """
    m = model_ctor().to(DEVICE)
    try:
        state = torch.load(ckpt_path, map_location=DEVICE, weights_only=True)  # newer torch
    except TypeError:
        state = torch.load(ckpt_path, map_location=DEVICE)                     # older torch
    m.load_state_dict(state)
    return m

def damage_neurons_in_layer(model: nn.Module, layer_lin_seq_index: int, pct_neurons: float, rng: np.random.Generator):
    """
    Damage a percentage of *neurons* in a given Linear layer:
    - Zero the corresponding rows of this layer's weight (incoming weights) and its bias.
    - Also zero the corresponding *columns* in the *next* Linear layer (outgoing connections),
      if a next Linear layer exists.
    Returns list of damaged neuron indices (relative to that layer).
    """
    linear_layers = get_linear_layers(model)
    # find target linear by its seq index inside model.net
    target_pos = None
    for pos, (seq_i, _) in enumerate(linear_layers):
        if seq_i == layer_lin_seq_index:
            target_pos = pos
            break
    assert target_pos is not None, f"No nn.Linear at seq index {layer_lin_seq_index} in model.net"

    target_seq_i, target_lin = linear_layers[target_pos]
    out_feats = target_lin.out_features
    n_dmg = max(1, int(round(pct_neurons * out_feats)))
    dmg_neurons = rng.choice(out_feats, size=n_dmg, replace=False)

    with torch.no_grad():
        # zero incoming weights + bias for damaged neurons
        W = target_lin.weight  # [out_features, in_features]
        W[dmg_neurons, :] = 0.0
        if target_lin.bias is not None:
            target_lin.bias[dmg_neurons] = 0.0

        # zero outgoing connections in the next linear (columns)
        if target_pos + 1 < len(linear_layers):
            _, next_lin = linear_layers[target_pos + 1]
            next_lin.weight[:, dmg_neurons] = 0.0

    return dmg_neurons.tolist()

def damage_weights_in_layer(model: nn.Module, layer_lin_seq_index: int, pct_weights: float, rng: np.random.Generator, mode="random"):
    """
    Damage a percentage of *weights* in a given Linear layer:
    - mode='random': independent random mask over all weights
    - mode='block' : zero a contiguous rectangular block (for 'specific area' effect)
    Returns number of weights zeroed.
    """
    target_lin = None
    for seq_i, lin in get_linear_layers(model):
        if seq_i == layer_lin_seq_index:
            target_lin = lin
            break
    assert target_lin is not None, f"No nn.Linear at seq index {layer_lin_seq_index} in model.net"

    with torch.no_grad():
        W = target_lin.weight  # [out_features, in_features]
        H, K = W.shape
        total = H * K
        n_dmg = max(1, int(round(pct_weights * total)))

        if mode == "random":
            idx = rng.choice(total, size=n_dmg, replace=False)
            rows = (idx // K).astype(int)
            cols = (idx % K).astype(int)
            W[rows, cols] = 0.0
        elif mode == "block":
            # choose a block whose area ≈ n_dmg
            h = max(1, int(round(np.sqrt(n_dmg))))
            k = max(1, int(round(n_dmg / h)))
            r0 = int(rng.integers(0, max(1, H - h + 1)))
            c0 = int(rng.integers(0, max(1, K - k + 1)))
            W[r0:r0+h, c0:c0+k] = 0.0
        else:
            raise ValueError("mode must be 'random' or 'block'")

    return n_dmg

def eval_task(model_ctor, ckpt_path, task, data_dir, damage_fn=None, repeats=1, **damage_kwargs):
    """
    Load a fresh model from checkpoint, optionally apply damage cumulatively, and evaluate after each step.
    Returns: list of dicts with keys {'repeat': int, 'metrics': {...}} where repeat=0 is the baseline.
    """
    set_seed(42)
    if task == "clf":
        _, _, test_loader, _ = make_loaders(data_dir, "clf", batch_size=256)
    else:
        _, _, test_loader, _ = make_loaders(data_dir, "reg", batch_size=512)

    # baseline
    model = clone_from_checkpoint(model_ctor, ckpt_path)
    out = [{"repeat": 0, "metrics": evaluate_model(model, test_loader, task)}]

    if damage_fn is None or repeats <= 0:
        return out

    rng = np.random.default_rng(12345)
    # progressive (cumulative) damage
    for r in range(1, repeats + 1):
        damage_fn(model, **damage_kwargs, rng=rng)
        m = evaluate_model(model, test_loader, task)
        out.append({"repeat": r, "metrics": m})
    return out

def save_experiment_log(rows, csv_name="damage_immediate_drop.csv"):
    """
    Append rows to a results CSV in RESULTS_DIR.
    Each row should already be a flat dict (add your own metadata before calling).
    """
    for row in rows:
        log_result(row, csv_name=csv_name)  # uses RESULTS_DIR internally
    return str(Path(RESULTS_DIR) / csv_name)


In [13]:
# === CONFIG: checkpoint paths & model-ctor lambdas 
# RERUN-AFTER-RESTART

from pathlib import Path
import numpy as np

# Use project-local data and models dirs defined earlier
BREAST_DIR = DATA_DIR / "breast_cancer"
CAL_DIR    = DATA_DIR / "california_housing"

def latest_ckpt(pattern: str) -> Path:
    """Pick the most recently modified checkpoint matching a pattern."""
    candidates = sorted(Path(MODELS_DIR).glob(pattern))
    assert candidates, f"No checkpoints matching pattern: {pattern}"
    # choose by modification time
    return max(candidates, key=lambda p: p.stat().st_mtime)

# Automatically pick the latest baseline checkpoints 
CLF_CKPT = latest_ckpt("clf_breast_cancer_baseline_*_best.pt")
REG_CKPT = latest_ckpt("reg_california_baseline_*_best.pt")

# Infer input dims from saved arrays
n_in_clf = np.load(BREAST_DIR / "X_train.npy").shape[1]
n_in_reg = np.load(CAL_DIR    / "X_train.npy").shape[1]

# Recreate model-ctor lambdas (match the baseline architectures you trained)
clf_ctor = lambda: FFNN_Classifier(n_in=n_in_clf, hidden=[64,64,32], n_out=2, p_drop=0.1)
reg_ctor = lambda: FFNN_Regression(n_in=n_in_reg, hidden=[128,64,32], p_drop=0.1)

print("CLF_CKPT:", CLF_CKPT)
print("REG_CKPT:", REG_CKPT)

# Inspect linear layers so we know which seq_index to target for damage
print("Classifier linear layers:", summarize_linear_layers(clf_ctor()))
print("Regression linear layers:", summarize_linear_layers(reg_ctor()))


CLF_CKPT: c:\Users\Admin\Desktop\ffnn-healing\models\clf_breast_cancer_baseline_20250813-103737_best.pt
REG_CKPT: c:\Users\Admin\Desktop\ffnn-healing\models\reg_california_baseline_20250813-104035_best.pt
Classifier linear layers: [{'seq_index': 0, 'in_features': 30, 'out_features': 64}, {'seq_index': 3, 'in_features': 64, 'out_features': 64}, {'seq_index': 6, 'in_features': 64, 'out_features': 32}, {'seq_index': 9, 'in_features': 32, 'out_features': 2}]
Regression linear layers: [{'seq_index': 0, 'in_features': 8, 'out_features': 128}, {'seq_index': 3, 'in_features': 128, 'out_features': 64}, {'seq_index': 6, 'in_features': 64, 'out_features': 32}, {'seq_index': 9, 'in_features': 32, 'out_features': 1}]


In [14]:
#rerun when needed
# SANITY EXPERIMENT: classification, 20% neurons in first hidden, 5 repeats ===
set_seed(42)

# first Linear layer's seq index (from your summary, it's 0)
FIRST_LINEAR = summarize_linear_layers(clone_from_checkpoint(clf_ctor, CLF_CKPT))[0]["seq_index"]

rows_to_log = []
res = eval_task(
    model_ctor=clf_ctor,
    ckpt_path=CLF_CKPT,
    task="clf",
    data_dir=BREAST_DIR,            # ← use local data dir
    damage_fn=damage_neurons_in_layer,
    repeats=5,                      # progressive (cumulative) damage
    layer_lin_seq_index=FIRST_LINEAR,
    pct_neurons=0.20                # 20% neurons
)

for step in res:
    r = step["repeat"]; m = step["metrics"]
    print(f"Repeat {r}: ACC={m['accuracy']:.4f}, F1={m['f1']:.4f}")
    rows_to_log.append({
        "timestamp": timestamp(),
        "phase": "damage_immediate",
        "dataset": "breast_cancer",
        "task": "clf",
        "damage_type": "neurons",
        "layer_seq_index": FIRST_LINEAR,
        "pct": 0.20,
        "repeat": r,
        "test_accuracy": m["accuracy"],
        "test_f1": m["f1"]
    })

csv_path = save_experiment_log(rows_to_log, "damage_immediate_drop.csv")
print("Logged to:", csv_path)


Repeat 0: ACC=0.9535, F1=0.9630
Repeat 1: ACC=0.9419, F1=0.9550
Repeat 2: ACC=0.9070, F1=0.9259
Repeat 3: ACC=0.8837, F1=0.9057
Repeat 4: ACC=0.8837, F1=0.9038
Repeat 5: ACC=0.8721, F1=0.8991
Logged to: c:\Users\Admin\Desktop\ffnn-healing\results\damage_immediate_drop.csv


In [15]:
# View damage log 
from pathlib import Path
import pandas as pd

csv = Path(RESULTS_DIR) / "damage_immediate_drop.csv"
print("Log path exists:", csv.exists(), "| path:", csv)
df = pd.read_csv(csv)
print("Last 8 rows:")
print(df.tail(8))


Log path exists: True | path: c:\Users\Admin\Desktop\ffnn-healing\results\damage_immediate_drop.csv
Last 8 rows:
         timestamp             phase        dataset task damage_type  \
0  20250813-104930  damage_immediate  breast_cancer  clf     neurons   
1  20250813-104930  damage_immediate  breast_cancer  clf     neurons   
2  20250813-104930  damage_immediate  breast_cancer  clf     neurons   
3  20250813-104930  damage_immediate  breast_cancer  clf     neurons   
4  20250813-104930  damage_immediate  breast_cancer  clf     neurons   
5  20250813-104930  damage_immediate  breast_cancer  clf     neurons   

   layer_seq_index  pct  repeat  test_accuracy   test_f1  
0                0  0.2       0       0.953488  0.962963  
1                0  0.2       1       0.941860  0.954955  
2                0  0.2       2       0.906977  0.925926  
3                0  0.2       3       0.883721  0.905660  
4                0  0.2       4       0.883721  0.903846  
5                0  0.2     

In [16]:
# === SANITY: regression, 20% neurons in first hidden, 5 repeats 
set_seed(42)

FIRST_LINEAR_REG = summarize_linear_layers(clone_from_checkpoint(reg_ctor, REG_CKPT))[0]["seq_index"]

rows_to_log = []
res = eval_task(
    model_ctor=reg_ctor,
    ckpt_path=REG_CKPT,
    task="reg",
    data_dir=CAL_DIR,                 # ← use local data dir
    damage_fn=damage_neurons_in_layer,
    repeats=5,
    layer_lin_seq_index=FIRST_LINEAR_REG,
    pct_neurons=0.20
)

for step in res:
    r = step["repeat"]; m = step["metrics"]
    print(f"Repeat {r}: MAE={m['mae']:.4f}, RMSE={m['rmse']:.4f}")
    rows_to_log.append({
        "timestamp": timestamp(),
        "phase": "damage_immediate",
        "dataset": "california_housing",
        "task": "reg",
        "damage_type": "neurons",
        "layer_seq_index": FIRST_LINEAR_REG,
        "pct": 0.20,
        "repeat": r,
        "test_mae": m["mae"],
        "test_rmse": m["rmse"]
    })

csv_path = save_experiment_log(rows_to_log, "damage_immediate_drop.csv")
print("Logged to:", csv_path)


Repeat 0: MAE=0.3749, RMSE=0.5373
Repeat 1: MAE=0.5098, RMSE=0.7588
Repeat 2: MAE=0.5256, RMSE=0.7507
Repeat 3: MAE=0.5723, RMSE=0.8086
Repeat 4: MAE=0.5954, RMSE=0.8313
Repeat 5: MAE=0.6137, RMSE=0.9004
Logged to: c:\Users\Admin\Desktop\ffnn-healing\results\damage_immediate_drop.csv


In [17]:
# === 4.4 EXPERIMENT RUNNER (logs tidy rows for each repeat) ===
# Tag: RERUN-AFTER-RESTART

def layer_seq_indices(model_ctor):
    m = model_ctor().to(DEVICE)
    return [d["seq_index"] for d in summarize_linear_layers(m)]

def run_damage_experiment(
    model_ctor, ckpt_path, task, data_dir,
    layer_seq_index, pct, repeats_list,
    damage_type="neurons", weight_mode="random",
    rng_seed=12345, csv_name="damage_immediate_drop.csv"
):
    """
    For each repeats in repeats_list:
      - reload clean checkpoint
      - apply progressive damage (repeats times) on given layer
      - evaluate after each repeat
      - log baseline + each step, with deltas vs baseline
    """
    assert damage_type in ("neurons","weights")
    rows = []
    for repeats in repeats_list:
        if damage_type == "neurons":
            res = eval_task(
                model_ctor=model_ctor, ckpt_path=ckpt_path, task=task, data_dir=data_dir,
                damage_fn=damage_neurons_in_layer, repeats=repeats,
                layer_lin_seq_index=layer_seq_index, pct_neurons=pct
            )
        else:
            res = eval_task(
                model_ctor=model_ctor, ckpt_path=ckpt_path, task=task, data_dir=data_dir,
                damage_fn=damage_weights_in_layer, repeats=repeats,
                layer_lin_seq_index=layer_seq_index, pct_weights=pct, mode=weight_mode
            )

        base = res[0]["metrics"]
        for step in res:
            r = step["repeat"]; m = step["metrics"]
            row = {
                "timestamp": timestamp(),
                "phase": "damage_immediate",
                "task": task,
                "damage_type": damage_type,
                "layer_seq_index": int(layer_seq_index),
                "pct": float(pct),
                "repeats_total": int(repeats),
                "repeat_eval": int(r),
                "ckpt": str(ckpt_path),
            }
            if task == "clf":
                row.update({
                    "metric": "accuracy",
                    "baseline_metric": float(base["accuracy"]),
                    "value": float(m["accuracy"]),
                    "delta_from_baseline": float(base["accuracy"] - m["accuracy"]),
                    "f1": float(m["f1"]),
                })
            else:
                row.update({
                    "metric": "rmse",
                    "baseline_metric": float(base["rmse"]),
                    "value": float(m["rmse"]),
                    "delta_from_baseline": float(m["rmse"] - base["rmse"]),
                    "mae": float(m["mae"]),
                })
            rows.append(row)

    for row in rows:
        log_result(row, csv_name=csv_name)
    print(f"Logged {len(rows)} rows to {RESULTS_DIR / csv_name}")
    return rows


In [18]:
# CLASSIFICATION: first hidden layer (seq 0), 20% neurons, repeats = [5]
first_lin_clf = layer_seq_indices(clf_ctor)[0]  # should be 0
_ = run_damage_experiment(
    model_ctor=clf_ctor, ckpt_path=CLF_CKPT, task="clf", data_dir=BREAST_DIR,
    layer_seq_index=first_lin_clf, pct=0.20, repeats_list=[5],
    damage_type="neurons", csv_name="damage_immediate_drop.csv"
)

# quick peek
import pandas as pd
df = pd.read_csv(RESULTS_DIR / "damage_immediate_drop.csv")
print(df.tail(6))


Logged 6 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_immediate_drop.csv
          timestamp             phase dataset task damage_type  \
12  20250813-105456  damage_immediate     NaN  clf     neurons   
13  20250813-105456  damage_immediate     NaN  clf     neurons   
14  20250813-105456  damage_immediate     NaN  clf     neurons   
15  20250813-105456  damage_immediate     NaN  clf     neurons   
16  20250813-105456  damage_immediate     NaN  clf     neurons   
17  20250813-105456  damage_immediate     NaN  clf     neurons   

    layer_seq_index  pct  repeat  test_accuracy  test_f1  test_mae  test_rmse  \
12                0  0.2     NaN            NaN      NaN       NaN        NaN   
13                0  0.2     NaN            NaN      NaN       NaN        NaN   
14                0  0.2     NaN            NaN      NaN       NaN        NaN   
15                0  0.2     NaN            NaN      NaN       NaN        NaN   
16                0  0.2     NaN            N

In [19]:
# === 4.4 EXPERIMENT RUNNER (with dataset_name + selectable CSV) — VS Code version ===
# Tag: RERUN-AFTER-RESTART

def run_damage_experiment(
    model_ctor, ckpt_path, task, data_dir,
    layer_seq_index, pct, repeats_list,
    damage_type="neurons", weight_mode="random",
    rng_seed=12345, csv_name="damage_runs.csv",
    dataset_name=None
):
    """
    For each 'repeats' in repeats_list:
      - reload clean checkpoint
      - apply progressive damage (repeats times) on given layer
      - evaluate after each repeat
      - log baseline + each step with deltas
    """
    assert damage_type in ("neurons", "weights")
    rows = []
    for repeats in repeats_list:
        if damage_type == "neurons":
            res = eval_task(
                model_ctor=model_ctor, ckpt_path=ckpt_path, task=task, data_dir=data_dir,
                damage_fn=damage_neurons_in_layer, repeats=repeats,
                layer_lin_seq_index=layer_seq_index, pct_neurons=pct
            )
        else:
            res = eval_task(
                model_ctor=model_ctor, ckpt_path=ckpt_path, task=task, data_dir=data_dir,
                damage_fn=damage_weights_in_layer, repeats=repeats,
                layer_lin_seq_index=layer_seq_index, pct_weights=pct, mode=weight_mode
            )

        base = res[0]["metrics"]
        for step in res:
            r = step["repeat"]; m = step["metrics"]
            row = {
                "timestamp": timestamp(),
                "phase": "damage_immediate",
                "dataset": dataset_name,
                "task": task,
                "damage_type": damage_type,
                "layer_seq_index": int(layer_seq_index),
                "pct": float(pct),
                "repeats_total": int(repeats),
                "repeat_eval": int(r),
                "ckpt": str(ckpt_path)
            }
            if task == "clf":
                row.update({
                    "metric": "accuracy",
                    "baseline_metric": float(base["accuracy"]),
                    "value": float(m["accuracy"]),
                    "delta_from_baseline": float(base["accuracy"] - m["accuracy"]),
                    "f1": float(m["f1"])
                })
            else:
                row.update({
                    "metric": "rmse",
                    "baseline_metric": float(base["rmse"]),
                    "value": float(m["rmse"]),
                    "delta_from_baseline": float(m["rmse"] - base["rmse"]),
                    "mae": float(m["mae"])
                })
            rows.append(row)

    for row in rows:
        log_result(row, csv_name=csv_name)
    print(f"Logged {len(rows)} rows to {RESULTS_DIR / csv_name}")
    return rows


In [20]:
# CLASSIFICATION: first hidden layer (seq 0), 20% neurons, repeats=[5]
#run on demand
first_lin_clf = layer_seq_indices(clf_ctor)[0]  # seq index 0
_ = run_damage_experiment(
    model_ctor=clf_ctor, ckpt_path=CLF_CKPT, task="clf", data_dir=BREAST_DIR,
    layer_seq_index=first_lin_clf, pct=0.20, repeats_list=[5],
    damage_type="neurons", csv_name="damage_runs.csv",
    dataset_name="breast_cancer"
)

# quick peek of the appended rows
import pandas as pd
df_new = pd.read_csv(RESULTS_DIR / "damage_runs.csv")
print(df_new.tail(6))


Logged 6 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
         timestamp             phase        dataset task damage_type  \
0  20250813-110039  damage_immediate  breast_cancer  clf     neurons   
1  20250813-110039  damage_immediate  breast_cancer  clf     neurons   
2  20250813-110039  damage_immediate  breast_cancer  clf     neurons   
3  20250813-110039  damage_immediate  breast_cancer  clf     neurons   
4  20250813-110039  damage_immediate  breast_cancer  clf     neurons   
5  20250813-110039  damage_immediate  breast_cancer  clf     neurons   

   layer_seq_index  pct  repeats_total  repeat_eval  \
0                0  0.2              5            0   
1                0  0.2              5            1   
2                0  0.2              5            2   
3                0  0.2              5            3   
4                0  0.2              5            4   
5                0  0.2              5            5   

                                

In [21]:
#  SMALL BATCH: clf · first hidden · neurons · 20% & 40% · repeats [5, 10]
set_seed(42)

first_lin_clf = layer_seq_indices(clf_ctor)[0]  # seq index 0

# 20% neurons, repeats 5 and 10
_ = run_damage_experiment(
    model_ctor=clf_ctor, ckpt_path=CLF_CKPT, task="clf", data_dir=BREAST_DIR,
    layer_seq_index=first_lin_clf, pct=0.20, repeats_list=[5, 10],
    damage_type="neurons", csv_name="damage_runs.csv", dataset_name="breast_cancer"
)

# 40% neurons, repeats 5 and 10
_ = run_damage_experiment(
    model_ctor=clf_ctor, ckpt_path=CLF_CKPT, task="clf", data_dir=BREAST_DIR,
    layer_seq_index=first_lin_clf, pct=0.40, repeats_list=[5, 10],
    damage_type="neurons", csv_name="damage_runs.csv", dataset_name="breast_cancer"
)

# quick peek of just these rows
import pandas as pd
df = pd.read_csv(RESULTS_DIR / "damage_runs.csv")
mask = (
    (df["dataset"] == "breast_cancer") &
    (df["task"] == "clf") &
    (df["damage_type"] == "neurons") &
    (df["layer_seq_index"] == first_lin_clf) &
    (df["pct"].isin([0.2, 0.4])) &
    (df["repeats_total"].isin([5, 10]))
)
print(df[mask].tail(12))


Logged 17 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
Logged 17 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
          timestamp             phase        dataset task damage_type  \
28  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
29  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
30  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
31  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
32  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
33  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
34  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
35  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
36  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
37  20250813-110315  damage_immediate  breast_cancer  clf     neurons   
38  20250813-110315  damage_immediate  br

In [22]:
# clf · OUTPUT layer · neurons · 20% & 40% · repeats [5, 10]
set_seed(42)

last_lin_clf = layer_seq_indices(clf_ctor)[-1]  # seq index of output Linear

# 20% neurons, repeats 5 and 10
_ = run_damage_experiment(
    model_ctor=clf_ctor, ckpt_path=CLF_CKPT, task="clf", data_dir=BREAST_DIR,
    layer_seq_index=last_lin_clf, pct=0.20, repeats_list=[5, 10],
    damage_type="neurons", csv_name="damage_runs.csv", dataset_name="breast_cancer"
)

# 40% neurons, repeats 5 and 10
_ = run_damage_experiment(
    model_ctor=clf_ctor, ckpt_path=CLF_CKPT, task="clf", data_dir=BREAST_DIR,
    layer_seq_index=last_lin_clf, pct=0.40, repeats_list=[5, 10],
    damage_type="neurons", csv_name="damage_runs.csv", dataset_name="breast_cancer"
)

# Quick peek of these specific rows
import pandas as pd
df = pd.read_csv(RESULTS_DIR / "damage_runs.csv")
mask = (
    (df["dataset"] == "breast_cancer") &
    (df["task"] == "clf") &
    (df["damage_type"] == "neurons") &
    (df["layer_seq_index"] == last_lin_clf) &
    (df["pct"].isin([0.2, 0.4])) &
    (df["repeats_total"].isin([5, 10]))
)
print(df[mask].tail(12))


Logged 17 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
Logged 17 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
          timestamp             phase        dataset task damage_type  \
62  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
63  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
64  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
65  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
66  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
67  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
68  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
69  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
70  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
71  20250813-110450  damage_immediate  breast_cancer  clf     neurons   
72  20250813-110450  damage_immediate  br

In [23]:
# SMALL BATCH: reg · first hidden · neurons · 20% & 40% · repeats [5, 10]
set_seed(42)

first_lin_reg = layer_seq_indices(reg_ctor)[0]  # seq index 0

# 20% neurons, repeats 5 and 10
_ = run_damage_experiment(
    model_ctor=reg_ctor, ckpt_path=REG_CKPT, task="reg", data_dir=CAL_DIR,
    layer_seq_index=first_lin_reg, pct=0.20, repeats_list=[5, 10],
    damage_type="neurons", csv_name="damage_runs.csv", dataset_name="california_housing"
)

# 40% neurons, repeats 5 and 10
_ = run_damage_experiment(
    model_ctor=reg_ctor, ckpt_path=REG_CKPT, task="reg", data_dir=CAL_DIR,
    layer_seq_index=first_lin_reg, pct=0.40, repeats_list=[5, 10],
    damage_type="neurons", csv_name="damage_runs.csv", dataset_name="california_housing"
)

# Quick peek of just these rows
import pandas as pd
df = pd.read_csv(RESULTS_DIR / "damage_runs.csv")
mask = (
    (df["dataset"] == "california_housing") &
    (df["task"] == "reg") &
    (df["damage_type"] == "neurons") &
    (df["layer_seq_index"] == first_lin_reg) &
    (df["pct"].isin([0.2, 0.4])) &
    (df["repeats_total"].isin([5, 10]))
)
print(df[mask].tail(12))


Logged 17 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
Logged 17 rows to c:\Users\Admin\Desktop\ffnn-healing\results\damage_runs.csv
           timestamp             phase             dataset task damage_type  \
96   20250813-110723  damage_immediate  california_housing  reg     neurons   
97   20250813-110724  damage_immediate  california_housing  reg     neurons   
98   20250813-110724  damage_immediate  california_housing  reg     neurons   
99   20250813-110724  damage_immediate  california_housing  reg     neurons   
100  20250813-110724  damage_immediate  california_housing  reg     neurons   
101  20250813-110724  damage_immediate  california_housing  reg     neurons   
102  20250813-110724  damage_immediate  california_housing  reg     neurons   
103  20250813-110724  damage_immediate  california_housing  reg     neurons   
104  20250813-110724  damage_immediate  california_housing  reg     neurons   
105  20250813-110724  damage_immediate  california_hou

In [24]:
#  Healing scaffolding (gradient masks + masked damage) 
#rerun
import torch
import torch.nn as nn
import numpy as np

# Reuse: get_linear_layers, summarize_linear_layers, clone_from_checkpoint,
# make_loaders, evaluate_model, set_seed, DEVICE, MODELS_DIR already defined.

def _ensure_mask(t, fill=1.0):
    """Create a float mask same shape as tensor t, filled with fill (1.0 = trainable, 0.0 = freeze)."""
    return torch.full_like(t, float(fill))

def combine_mask_(base_mask: torch.Tensor, new_mask: torch.Tensor):
    """
    Combine two masks in-place using AND semantics for freezing:
    final_keep = base_keep * new_keep
    """
    base_mask.mul_(new_mask)
    return base_mask

def register_grad_mask_hooks(mask_dict):
    """
    Given {param_tensor: mask_tensor}, register a backward hook that multiplies incoming gradients by the mask.
    Keep the returned handles alive; call handle.remove() when done.
    """
    hooks = []
    for p, m in mask_dict.items():
        mm = m.to(p.device).to(p.dtype)
        h = p.register_hook(lambda g, _mm=mm: g * _mm)
        hooks.append(h)
    return hooks

def init_full_keep_masks_for_model(model: nn.Module):
    """
    Build an initial mask dict of ones (keep/train) for each Linear weight/bias.
    We will AND (multiply) zeros into these masks as we damage more.
    """
    mask_dict = {}
    for _, m in model.named_modules():
        if isinstance(m, nn.Linear):
            mask_dict[m.weight] = _ensure_mask(m.weight, fill=1.0)
            if m.bias is not None:
                mask_dict[m.bias] = _ensure_mask(m.bias, fill=1.0)
    return mask_dict

def damage_neurons_in_layer_with_masks(model: nn.Module, layer_lin_seq_index: int, pct_neurons: float,
                                       rng: np.random.Generator, mask_dict=None):
    """
    Same as damage_neurons_in_layer, but also updates a mask_dict so damaged entries are frozen.
    - Zero rows in target Linear weight + corresponding bias entries.
    - Zero columns in the next Linear weight (outgoing).
    Returns: (damaged_neuron_indices, mask_dict)
    """
    linear_layers = get_linear_layers(model)
    target_pos = None
    for k, (seq_i, lin) in enumerate(linear_layers):
        if seq_i == layer_lin_seq_index:
            target_pos = k
            break
    assert target_pos is not None, f"No Linear layer at seq index {layer_lin_seq_index}"
    _, target_lin = linear_layers[target_pos]
    next_lin = linear_layers[target_pos + 1][1] if (target_pos + 1) < len(linear_layers) else None

    out_feats = target_lin.out_features
    n_dmg = max(1, int(round(pct_neurons * out_feats)))
    dmg_neurons = rng.choice(out_feats, size=n_dmg, replace=False)

    if mask_dict is None:
        mask_dict = init_full_keep_masks_for_model(model)

    with torch.no_grad():
        # Zero rows in target weight + target bias at damaged neurons; mark them frozen (mask=0)
        W = target_lin.weight  # [out, in]
        W[dmg_neurons, :] = 0.0
        maskW = mask_dict.get(W, _ensure_mask(W, 1.0)); maskW[dmg_neurons, :] = 0.0; mask_dict[W] = maskW

        if target_lin.bias is not None:
            b = target_lin.bias
            b[dmg_neurons] = 0.0
            maskb = mask_dict.get(b, _ensure_mask(b, 1.0)); maskb[dmg_neurons] = 0.0; mask_dict[b] = maskb

        # Zero outgoing columns in next layer's weight; mark them frozen
        if next_lin is not None:
            Wn = next_lin.weight  # [next_out, next_in]
            Wn[:, dmg_neurons] = 0.0
            maskWn = mask_dict.get(Wn, _ensure_mask(Wn, 1.0)); maskWn[:, dmg_neurons] = 0.0; mask_dict[Wn] = maskWn

    return dmg_neurons.tolist(), mask_dict

def apply_progressive_neuron_damage_with_masks(model_ctor, ckpt_path, task, data_dir,
                                               layer_lin_seq_index, pct_neurons, repeats, seed=12345):
    """
    Load clean model, apply progressive neuron damage (repeats times),
    accumulating both damage and masks.
    Returns: model_damaged, mask_dict, train_loader, val_loader, test_loader, metrics_before, metrics_after
    """
    if task == "clf":
        train_loader, val_loader, test_loader, _ = make_loaders(data_dir, "clf", batch_size=256)
    else:
        train_loader, val_loader, test_loader, _ = make_loaders(data_dir, "reg", batch_size=512)

    model = clone_from_checkpoint(model_ctor, ckpt_path)
    metrics_before = evaluate_model(model, test_loader, task)

    rng = np.random.default_rng(seed)
    mask_dict = init_full_keep_masks_for_model(model)

    for _ in range(repeats):
        _, mask_dict = damage_neurons_in_layer_with_masks(
            model, layer_lin_seq_index=layer_lin_seq_index, pct_neurons=pct_neurons, rng=rng, mask_dict=mask_dict
        )

    metrics_after = evaluate_model(model, test_loader, task)
    return model, mask_dict, train_loader, val_loader, test_loader, metrics_before, metrics_after

def healing_train_constrained(model, mask_dict, train_loader, val_loader, task,
                              max_epochs=50, lr=1e-3, weight_decay=1e-4, patience=10, run_name="heal"):
    """
    Retrain while freezing masked entries (0=freeze). We enforce freezing by:
      1) multiplying grads by mask (hook), AND
      2) post-step: p.data *= mask  (prevents weight_decay/optimizer from moving frozen params)
    Returns: best_path, history
    """
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss() if task == "clf" else nn.MSELoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.5, patience=5)

    # register grad-mask hooks
    hooks = register_grad_mask_hooks(mask_dict)

    history = {"epoch": [], "train_loss": [], "val_loss": [], "val_metric": []}
    best_val = float("inf")
    best_path = Path(MODELS_DIR) / f"{run_name}_{timestamp()}_best.pt"
    epochs_no_improve = 0

    for epoch in range(1, max_epochs + 1):
        model.train(); train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            opt.zero_grad()
            out = model(xb)
            loss = criterion(out, yb if task == "clf" else yb)
            loss.backward()

            # standard optimizer step
            opt.step()

            # POST-STEP ENFORCEMENT: keep frozen entries at their masked values (0)
            with torch.no_grad():
                for p, m in mask_dict.items():
                    p.data.mul_(m.to(p.device).to(p.dtype))

            train_losses.append(loss.item())
        train_loss = float(np.mean(train_losses)) if train_losses else 0.0

        # validate
        model.eval(); val_losses = []; y_true_list, y_pred_list = [], []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                out = model(xb)
                vloss = criterion(out, yb if task == "clf" else yb)
                val_losses.append(vloss.item())
                if task == "clf":
                    y_pred_list.append(out.argmax(dim=1).cpu().numpy()); y_true_list.append(yb.cpu().numpy())
                else:
                    y_pred_list.append(out.cpu().numpy().squeeze()); y_true_list.append(yb.cpu().numpy().squeeze())

        val_loss = float(np.mean(val_losses)) if val_losses else 0.0
        if task == "clf":
            import numpy as _np
            y_true = _np.concatenate(y_true_list); y_pred = _np.concatenate(y_pred_list)
            from sklearn.metrics import accuracy_score as _acc
            val_metric = float(_acc(y_true, y_pred))
        else:
            import numpy as _np, math as _math
            from sklearn.metrics import mean_squared_error as _mse
            y_true = _np.array(_np.concatenate([_np.atleast_1d(a) for a in y_true_list]))
            y_pred = _np.array(_np.concatenate([_np.atleast_1d(a) for a in y_pred_list]))
            val_metric = float(_math.sqrt(_mse(y_true, y_pred)))

        scheduler.step(val_loss)
        history["epoch"].append(epoch); history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss); history["val_metric"].append(val_metric)

        # save best by val_loss
        if val_loss < best_val - 1e-8:
            best_val = val_loss; epochs_no_improve = 0
            torch.save(model.state_dict(), best_path); mark = "*"
        else:
            epochs_no_improve += 1; mark = ""
        if epoch % 10 == 0 or mark == "*":
            print(f"[{run_name}] epoch {epoch:03d} | train {train_loss:.4f} | val {val_loss:.4f} | metric {val_metric:.4f} {mark}")
        if epochs_no_improve >= patience:
            print(f"[{run_name}] Early stopping at epoch {epoch}. Best val_loss={best_val:.4f}")
            break

    # clean up hooks
    for h in hooks:
        h.remove()
    return best_path, history


In [25]:
#rerun
# create a damaged model + masks (clf, first hidden, 20%, 5 repeats)
set_seed(42)

first_lin_clf = layer_seq_indices(clf_ctor)[0]  # seq index 0

damaged_model, mask_dict, train_loader, val_loader, test_loader, m_before, m_after = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=clf_ctor,
        ckpt_path=CLF_CKPT,
        task="clf",
        data_dir=BREAST_DIR,          # ← local data dir
        layer_lin_seq_index=first_lin_clf,
        pct_neurons=0.20,
        repeats=5,
        seed=12345
    )

print("BASELINE:", m_before)
print("POST-DAMAGE:", m_after)


BASELINE: {'accuracy': 0.9534883720930233, 'f1': 0.9629629629629629}
POST-DAMAGE: {'accuracy': 0.872093023255814, 'f1': 0.8990825688073395}


In [26]:
#rerun
# heal (short vs long), evaluate, save plots, and log

# ---- SHORT HEAL (≈15 epochs) ----
short_name = "heal_clf_firstHidden_p20_r5_short"
best_short_path, hist_short = healing_train_constrained(
    model=damaged_model,
    mask_dict=mask_dict,
    train_loader=train_loader,
    val_loader=val_loader,
    task="clf",
    max_epochs=15,          # quick heal
    lr=1e-3,
    weight_decay=1e-4,
    patience=5,
    run_name=short_name
)

# Load the best short-healed weights into a fresh model and evaluate on TEST
m_short = clf_ctor().to(DEVICE)
m_short.load_state_dict(torch.load(best_short_path, map_location=DEVICE))
metrics_short = evaluate_model(m_short, test_loader, task="clf")
print("SHORT HEAL (≈15 epochs) TEST:", metrics_short)

# Plot short-heal curves
fig_loss_s, fig_metric_s = plot_history(hist_short, short_name, task="clf")

# ---- LONG HEAL (≈150 epochs) ----
# IMPORTANT: start from the same post-damage state again (not from the short-healed model)
damaged_model2, mask_dict2, train_loader2, val_loader2, test_loader2, _, _ = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=clf_ctor,
        ckpt_path=CLF_CKPT,
        task="clf",
        data_dir=BREAST_DIR,          # ← local data dir
        layer_lin_seq_index=first_lin_clf,
        pct_neurons=0.20,
        repeats=5,
        seed=12345
    )

long_name = "heal_clf_firstHidden_p20_r5_long"
best_long_path, hist_long = healing_train_constrained(
    model=damaged_model2,
    mask_dict=mask_dict2,
    train_loader=train_loader2,
    val_loader=val_loader2,
    task="clf",
    max_epochs=150,         # longer heal
    lr=1e-3,
    weight_decay=1e-4,
    patience=20,
    run_name=long_name
)

m_long = clf_ctor().to(DEVICE)
m_long.load_state_dict(torch.load(best_long_path, map_location=DEVICE))
metrics_long = evaluate_model(m_long, test_loader2, task="clf")
print("LONG HEAL (≈150 epochs) TEST:", metrics_long)

# Plot long-heal curves
fig_loss_l, fig_metric_l = plot_history(hist_long, long_name, task="clf")

# ---- LOG everything to a separate healing CSV ----
rows = [
    {
        "timestamp": timestamp(),
        "phase": "healing",
        "dataset": "breast_cancer",
        "task": "clf",
        "stage": "baseline",
        "layer_seq_index": int(first_lin_clf),
        "pct": 0.20,
        "repeats": 5,
        "accuracy": float(m_before["accuracy"]),
        "f1": float(m_before["f1"])
    },
    {
        "timestamp": timestamp(),
        "phase": "healing",
        "dataset": "breast_cancer",
        "task": "clf",
        "stage": "post_damage",
        "layer_seq_index": int(first_lin_clf),
        "pct": 0.20,
        "repeats": 5,
        "accuracy": float(m_after["accuracy"]),
        "f1": float(m_after["f1"])
    },
    {
        "timestamp": timestamp(),
        "phase": "healing",
        "dataset": "breast_cancer",
        "task": "clf",
        "stage": "healed_short",
        "layer_seq_index": int(first_lin_clf),
        "pct": 0.20,
        "repeats": 5,
        "epochs": len(hist_short["epoch"]),
        "accuracy": float(metrics_short["accuracy"]),
        "f1": float(metrics_short["f1"]),
        "best_ckpt": str(best_short_path),
        "fig_loss": fig_loss_s,
        "fig_metric": fig_metric_s
    },
    {
        "timestamp": timestamp(),
        "phase": "healing",
        "dataset": "breast_cancer",
        "task": "clf",
        "stage": "healed_long",
        "layer_seq_index": int(first_lin_clf),
        "pct": 0.20,
        "repeats": 5,
        "epochs": len(hist_long["epoch"]),
        "accuracy": float(metrics_long["accuracy"]),
        "f1": float(metrics_long["f1"]),
        "best_ckpt": str(best_long_path),
        "fig_loss": fig_loss_l,
        "fig_metric": fig_metric_l
    }
]

for r in rows:
    log_result(r, csv_name="healing_runs.csv")

print("Logged 4 rows to", RESULTS_DIR / "healing_runs.csv")
print("Short heal plots:", fig_loss_s, "|", fig_metric_s)
print("Long heal plots:", fig_loss_l, "|", fig_metric_l)


[heal_clf_firstHidden_p20_r5_short] epoch 001 | train 0.1583 | val 0.1570 | metric 0.9412 *
[heal_clf_firstHidden_p20_r5_short] epoch 002 | train 0.1435 | val 0.1386 | metric 0.9529 *
[heal_clf_firstHidden_p20_r5_short] epoch 003 | train 0.1475 | val 0.1244 | metric 0.9529 *
[heal_clf_firstHidden_p20_r5_short] epoch 004 | train 0.1060 | val 0.1126 | metric 0.9647 *
[heal_clf_firstHidden_p20_r5_short] epoch 005 | train 0.1099 | val 0.1030 | metric 0.9765 *
[heal_clf_firstHidden_p20_r5_short] epoch 006 | train 0.0790 | val 0.0950 | metric 0.9765 *
[heal_clf_firstHidden_p20_r5_short] epoch 007 | train 0.0938 | val 0.0895 | metric 0.9765 *
[heal_clf_firstHidden_p20_r5_short] epoch 008 | train 0.0886 | val 0.0849 | metric 0.9765 *
[heal_clf_firstHidden_p20_r5_short] epoch 009 | train 0.0716 | val 0.0820 | metric 0.9765 *
[heal_clf_firstHidden_p20_r5_short] epoch 010 | train 0.0671 | val 0.0788 | metric 0.9765 *
[heal_clf_firstHidden_p20_r5_short] epoch 011 | train 0.0817 | val 0.0766 | metr

In [27]:
# View healing log 
from pathlib import Path
import pandas as pd

csv = RESULTS_DIR / "healing_runs.csv"
print("Exists:", csv.exists(), "|", csv)
dfh = pd.read_csv(csv)
print(dfh.tail(4))


Exists: True | c:\Users\Admin\Desktop\ffnn-healing\results\healing_runs.csv
         timestamp    phase        dataset task         stage  \
0  20250813-111348  healing  breast_cancer  clf      baseline   
1  20250813-111348  healing  breast_cancer  clf   post_damage   
2  20250813-111348  healing  breast_cancer  clf  healed_short   
3  20250813-111348  healing  breast_cancer  clf   healed_long   

   layer_seq_index  pct  repeats  accuracy        f1  epochs  \
0                0  0.2        5  0.953488  0.962963     NaN   
1                0  0.2        5  0.872093  0.899083     NaN   
2                0  0.2        5  0.930233  0.944444    15.0   
3                0  0.2        5  0.965116  0.971963    46.0   

                                           best_ckpt  \
0                                                NaN   
1                                                NaN   
2  c:\Users\Admin\Desktop\ffnn-healing\models\hea...   
3  c:\Users\Admin\Desktop\ffnn-healing\models\hea... 

We’ll run two tiny healing experiments:

Case A (likely recoverable): output layer · 20% neurons · repeats=1

Case B (likely non-recoverable): output layer · 20% neurons · repeats=5 (often kills both logits across repeats → model collapses)

In [28]:
# A1 — damage: output layer, 20%, repeats=1 
set_seed(42)

last_lin_clf = layer_seq_indices(clf_ctor)[-1]  # seq index of output Linear

damaged_A, mask_A, train_A, val_A, test_A, mA_before, mA_after = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=clf_ctor,
        ckpt_path=CLF_CKPT,
        task="clf",
        data_dir=BREAST_DIR,        # ← local data dir
        layer_lin_seq_index=last_lin_clf,
        pct_neurons=0.20,           # with 2 output neurons, this will zero 1 of them
        repeats=1,
        seed=12345
    )

print("A — BASELINE:", mA_before)
print("A — POST-DAMAGE:", mA_after)


A — BASELINE: {'accuracy': 0.9534883720930233, 'f1': 0.9629629629629629}
A — POST-DAMAGE: {'accuracy': 0.9651162790697675, 'f1': 0.972972972972973}


In [29]:
# A2 — heal short (15) & long (150), evaluate, log
A_short = "heal_clf_OUTPUT_p20_r1_short"
bestA_s, histA_s = healing_train_constrained(
    model=damaged_A, mask_dict=mask_A,
    train_loader=train_A, val_loader=val_A, task="clf",
    max_epochs=15, lr=1e-3, weight_decay=1e-4, patience=5,
    run_name=A_short
)
mA_s = clf_ctor().to(DEVICE)
mA_s.load_state_dict(torch.load(bestA_s, map_location=DEVICE))
metricsA_s = evaluate_model(mA_s, test_A, task="clf")
print("A — SHORT HEAL TEST:", metricsA_s)
figA_s_loss, figA_s_metric = plot_history(histA_s, A_short, task="clf")

# Recreate the SAME post-damage state for a clean long run
damaged_A2, mask_A2, train_A2, val_A2, test_A2, _, _ = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=clf_ctor,
        ckpt_path=CLF_CKPT,
        task="clf",
        data_dir=BREAST_DIR,      # ← local data dir (fixed)
        layer_lin_seq_index=last_lin_clf,
        pct_neurons=0.20,
        repeats=1,
        seed=12345
    )

A_long = "heal_clf_OUTPUT_p20_r1_long"
bestA_l, histA_l = healing_train_constrained(
    model=damaged_A2, mask_dict=mask_A2,
    train_loader=train_A2, val_loader=val_A2, task="clf",
    max_epochs=150, lr=1e-3, weight_decay=1e-4, patience=20,
    run_name=A_long
)
mA_l = clf_ctor().to(DEVICE)
mA_l.load_state_dict(torch.load(bestA_l, map_location=DEVICE))
metricsA_l = evaluate_model(mA_l, test_A2, task="clf")
print("A — LONG HEAL TEST:", metricsA_l)
figA_l_loss, figA_l_metric = plot_history(histA_l, A_long, task="clf")

# Log
rowsA = [
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "baseline", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 1,
     "accuracy": float(mA_before["accuracy"]), "f1": float(mA_before["f1"])},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "post_damage", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 1,
     "accuracy": float(mA_after["accuracy"]), "f1": float(mA_after["f1"])},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "healed_short", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 1,
     "epochs": len(histA_s["epoch"]), "accuracy": float(metricsA_s["accuracy"]), "f1": float(metricsA_s["f1"]),
     "best_ckpt": str(bestA_s), "fig_loss": figA_s_loss, "fig_metric": figA_s_metric},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "healed_long", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 1,
     "epochs": len(histA_l["epoch"]), "accuracy": float(metricsA_l["accuracy"]), "f1": float(metricsA_l["f1"]),
     "best_ckpt": str(bestA_l), "fig_loss": figA_l_loss, "fig_metric": figA_l_metric},
]
for r in rowsA:
    log_result(r, csv_name="healing_runs.csv")

print("A — Logged 4 rows to", RESULTS_DIR / "healing_runs.csv")
print("A — Plots:", figA_s_loss, figA_s_metric, "|", figA_l_loss, figA_l_metric)


[heal_clf_OUTPUT_p20_r1_short] epoch 001 | train 0.1133 | val 0.0721 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 002 | train 0.1014 | val 0.0674 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 003 | train 0.0803 | val 0.0629 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 004 | train 0.0840 | val 0.0600 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 005 | train 0.0777 | val 0.0585 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 006 | train 0.0578 | val 0.0576 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 008 | train 0.0711 | val 0.0572 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 009 | train 0.0548 | val 0.0569 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 010 | train 0.0542 | val 0.0554 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 011 | train 0.0520 | val 0.0539 | metric 0.9765 *
[heal_clf_OUTPUT_p20_r1_short] epoch 012 | train 0.0494 | val 0.0535 | metric 0.9765 *
A — SHORT HEAL TEST: {'accuracy': 0.9534883

In [30]:
# B1 — damage: output layer, 20%, repeats=5 
set_seed(42)

damaged_B, mask_B, train_B, val_B, test_B, mB_before, mB_after = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=clf_ctor,
        ckpt_path=CLF_CKPT,
        task="clf",
        data_dir=BREAST_DIR,        # ← local data dir
        layer_lin_seq_index=last_lin_clf,
        pct_neurons=0.20,
        repeats=5,
        seed=12345
    )

print("B — BASELINE:", mB_before)
print("B — POST-DAMAGE:", mB_after)


B — BASELINE: {'accuracy': 0.9534883720930233, 'f1': 0.9629629629629629}
B — POST-DAMAGE: {'accuracy': 0.37209302325581395, 'f1': 0.0}


In [31]:
# B2 — heal short (15) & long (150), evaluate, log  
B_short = "heal_clf_OUTPUT_p20_r5_short"
bestB_s, histB_s = healing_train_constrained(
    model=damaged_B, mask_dict=mask_B,
    train_loader=train_B, val_loader=val_B, task="clf",
    max_epochs=15, lr=1e-3, weight_decay=1e-4, patience=5,
    run_name=B_short
)
mB_s = clf_ctor().to(DEVICE)
mB_s.load_state_dict(torch.load(bestB_s, map_location=DEVICE))
metricsB_s = evaluate_model(mB_s, test_B, task="clf")
print("B — SHORT HEAL TEST:", metricsB_s)
figB_s_loss, figB_s_metric = plot_history(histB_s, B_short, task="clf")

# Fresh same-damage state for long run
damaged_B2, mask_B2, train_B2, val_B2, test_B2, _, _ = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=clf_ctor,
        ckpt_path=CLF_CKPT,
        task="clf",
        data_dir=BREAST_DIR,     # ← FIXED
        layer_lin_seq_index=last_lin_clf,
        pct_neurons=0.20,
        repeats=5,
        seed=12345
    )

B_long = "heal_clf_OUTPUT_p20_r5_long"
bestB_l, histB_l = healing_train_constrained(
    model=damaged_B2, mask_dict=mask_B2,
    train_loader=train_B2, val_loader=val_B2, task="clf",
    max_epochs=150, lr=1e-3, weight_decay=1e-4, patience=20,
    run_name=B_long
)
mB_l = clf_ctor().to(DEVICE)
mB_l.load_state_dict(torch.load(bestB_l, map_location=DEVICE))
metricsB_l = evaluate_model(mB_l, test_B2, task="clf")
print("B — LONG HEAL TEST:", metricsB_l)
figB_l_loss, figB_l_metric = plot_history(histB_l, B_long, task="clf")

# Log
rowsB = [
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "baseline", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 5,
     "accuracy": float(mB_before["accuracy"]), "f1": float(mB_before["f1"])},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "post_damage", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 5,
     "accuracy": float(mB_after["accuracy"]), "f1": float(mB_after["f1"])},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "healed_short", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 5,
     "epochs": len(histB_s["epoch"]), "accuracy": float(metricsB_s["accuracy"]), "f1": float(metricsB_s["f1"]),
     "best_ckpt": str(bestB_s), "fig_loss": figB_s_loss, "fig_metric": figB_s_metric},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "breast_cancer", "task": "clf",
     "stage": "healed_long", "layer_seq_index": int(last_lin_clf), "pct": 0.20, "repeats": 5,
     "epochs": len(histB_l["epoch"]), "accuracy": float(metricsB_l["accuracy"]), "f1": float(metricsB_l["f1"]),
     "best_ckpt": str(bestB_l), "fig_loss": figB_l_loss, "fig_metric": figB_l_metric},
]
for r in rowsB:
    log_result(r, csv_name="healing_runs.csv")
print("B — Logged 4 rows to", RESULTS_DIR / "healing_runs.csv")
print("B — Plots:", figB_s_loss, figB_s_metric, "|", figB_l_loss, figB_l_metric)


[heal_clf_OUTPUT_p20_r5_short] epoch 001 | train 0.6931 | val 0.6931 | metric 0.3765 *
[heal_clf_OUTPUT_p20_r5_short] Early stopping at epoch 6. Best val_loss=0.6931
B — SHORT HEAL TEST: {'accuracy': 0.37209302325581395, 'f1': 0.0}
[heal_clf_OUTPUT_p20_r5_long] epoch 001 | train 0.6931 | val 0.6931 | metric 0.3765 *
[heal_clf_OUTPUT_p20_r5_long] epoch 010 | train 0.6931 | val 0.6931 | metric 0.3765 
[heal_clf_OUTPUT_p20_r5_long] epoch 020 | train 0.6931 | val 0.6931 | metric 0.3765 
[heal_clf_OUTPUT_p20_r5_long] Early stopping at epoch 21. Best val_loss=0.6931
B — LONG HEAL TEST: {'accuracy': 0.37209302325581395, 'f1': 0.0}
B — Logged 4 rows to c:\Users\Admin\Desktop\ffnn-healing\results\healing_runs.csv
B — Plots: c:\Users\Admin\Desktop\ffnn-healing\figures\heal_clf_OUTPUT_p20_r5_short_loss.png c:\Users\Admin\Desktop\ffnn-healing\figures\heal_clf_OUTPUT_p20_r5_short_accuracy.png | c:\Users\Admin\Desktop\ffnn-healing\figures\heal_clf_OUTPUT_p20_r5_long_loss.png c:\Users\Admin\Desktop\f

In [32]:
# regression: damage first hidden layer, 20%, repeats=5
#run on demand
set_seed(42)

first_lin_reg = layer_seq_indices(reg_ctor)[0]  # seq index 0 for reg model

damaged_R, mask_R, train_R, val_R, test_R, mR_before, mR_after = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=reg_ctor,
        ckpt_path=REG_CKPT,
        task="reg",
        data_dir=CAL_DIR,          # ← local data dir
        layer_lin_seq_index=first_lin_reg,
        pct_neurons=0.20,
        repeats=5,
        seed=12345
    )

print("REG — BASELINE:", mR_before)
print("REG — POST-DAMAGE:", mR_after)


REG — BASELINE: {'mae': 0.37485018372535706, 'rmse': 0.5373355181330539}
REG — POST-DAMAGE: {'mae': 0.6136718988418579, 'rmse': 0.9004139597221144}


In [33]:
# R2 — regression healing (short vs long), evaluate, log
# run on demand
# ---- SHORT HEAL (≈15 epochs) ----
R_short = "heal_reg_firstHidden_p20_r5_short"
bestR_s, histR_s = healing_train_constrained(
    model=damaged_R,
    mask_dict=mask_R,
    train_loader=train_R,
    val_loader=val_R,
    task="reg",
    max_epochs=15,          # quick heal
    lr=1e-3,
    weight_decay=1e-4,
    patience=5,
    run_name=R_short
)

mR_s = reg_ctor().to(DEVICE)
mR_s.load_state_dict(torch.load(bestR_s, map_location=DEVICE))
metricsR_s = evaluate_model(mR_s, test_R, task="reg")
print("REG — SHORT HEAL TEST:", metricsR_s)
figR_s_loss, figR_s_metric = plot_history(histR_s, R_short, task="reg")

# ---- LONG HEAL (≤150 epochs) ----
# Recreate the SAME post-damage state so long run starts from identical damage
damaged_R2, mask_R2, train_R2, val_R2, test_R2, _, _ = \
    apply_progressive_neuron_damage_with_masks(
        model_ctor=reg_ctor,
        ckpt_path=REG_CKPT,
        task="reg",
        data_dir=CAL_DIR,          # ← local data dir (fixed)
        layer_lin_seq_index=first_lin_reg,
        pct_neurons=0.20,
        repeats=5,
        seed=12345
    )

R_long = "heal_reg_firstHidden_p20_r5_long"
bestR_l, histR_l = healing_train_constrained(
    model=damaged_R2,
    mask_dict=mask_R2,
    train_loader=train_R2,
    val_loader=val_R2,
    task="reg",
    max_epochs=150,
    lr=1e-3,
    weight_decay=1e-4,
    patience=20,
    run_name=R_long
)

mR_l = reg_ctor().to(DEVICE)
mR_l.load_state_dict(torch.load(bestR_l, map_location=DEVICE))
metricsR_l = evaluate_model(mR_l, test_R2, task="reg")
print("REG — LONG HEAL TEST:", metricsR_l)
figR_l_loss, figR_l_metric = plot_history(histR_l, R_long, task="reg")

# ---- LOG to healing_runs.csv (mae & rmse) ----
rowsR = [
    {"timestamp": timestamp(), "phase": "healing", "dataset": "california_housing", "task": "reg",
     "stage": "baseline", "layer_seq_index": int(first_lin_reg), "pct": 0.20, "repeats": 5,
     "mae": float(mR_before["mae"]), "rmse": float(mR_before["rmse"])},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "california_housing", "task": "reg",
     "stage": "post_damage", "layer_seq_index": int(first_lin_reg), "pct": 0.20, "repeats": 5,
     "mae": float(mR_after["mae"]), "rmse": float(mR_after["rmse"])},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "california_housing", "task": "reg",
     "stage": "healed_short", "layer_seq_index": int(first_lin_reg), "pct": 0.20, "repeats": 5,
     "epochs": len(histR_s["epoch"]),
     "mae": float(metricsR_s["mae"]), "rmse": float(metricsR_s["rmse"]),
     "best_ckpt": str(bestR_s), "fig_loss": figR_s_loss, "fig_metric": figR_s_metric},
    {"timestamp": timestamp(), "phase": "healing", "dataset": "california_housing", "task": "reg",
     "stage": "healed_long", "layer_seq_index": int(first_lin_reg), "pct": 0.20, "repeats": 5,
     "epochs": len(histR_l["epoch"]),
     "mae": float(metricsR_l["mae"]), "rmse": float(metricsR_l["rmse"]),
     "best_ckpt": str(bestR_l), "fig_loss": figR_l_loss, "fig_metric": figR_l_metric},
]
for r in rowsR:
    log_result(r, csv_name="healing_runs.csv")

print("REG — Logged 4 rows to", RESULTS_DIR / "healing_runs.csv")
print("REG — Plots:", figR_s_loss, figR_s_metric, "|", figR_l_loss, figR_l_metric)


[heal_reg_firstHidden_p20_r5_short] epoch 001 | train 0.5677 | val 0.4425 | metric 0.6486 *
[heal_reg_firstHidden_p20_r5_short] epoch 002 | train 0.4676 | val 0.4181 | metric 0.6257 *
[heal_reg_firstHidden_p20_r5_short] epoch 003 | train 0.4479 | val 0.4089 | metric 0.6176 *
[heal_reg_firstHidden_p20_r5_short] epoch 004 | train 0.4293 | val 0.4010 | metric 0.6137 *
[heal_reg_firstHidden_p20_r5_short] epoch 005 | train 0.4321 | val 0.3963 | metric 0.6076 *
[heal_reg_firstHidden_p20_r5_short] epoch 008 | train 0.4227 | val 0.3913 | metric 0.6023 *
[heal_reg_firstHidden_p20_r5_short] epoch 009 | train 0.4038 | val 0.3874 | metric 0.5977 *
[heal_reg_firstHidden_p20_r5_short] epoch 010 | train 0.4046 | val 0.3908 | metric 0.6000 
[heal_reg_firstHidden_p20_r5_short] epoch 011 | train 0.4024 | val 0.3821 | metric 0.5945 *
[heal_reg_firstHidden_p20_r5_short] epoch 012 | train 0.3979 | val 0.3743 | metric 0.5903 *
[heal_reg_firstHidden_p20_r5_short] epoch 014 | train 0.3894 | val 0.3714 | metri