# Weighted BCE Training (Experimental)

1. Experimental weighted-BCE training for the PyTorch novelty model.
2. Uses the same architecture as production (PatentNoveltyNet).
3. Adds class weighting via BCEWithLogitsLoss (pos_weight).
4. Saves model and metrics to experimental/bce/outputs/weighted_bce/.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path().absolute().parent.parent))

import json
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from src.models.pytorch_classifier import PatentNoveltyNet

## Load Data


In [None]:
root = Path().absolute().parent.parent
X = np.load(root / "data" / "features" / "train_features_v2.X.npy")
y = np.load(root / "data" / "features" / "train_features_v2.y.npy")

if X.shape[1] == 13:
    idx_keep = [i for i in range(13) if i not in [0, 1, 6]]
    X = X[:, idx_keep]

print(f"Loaded data: {X.shape[0]} samples, {X.shape[1]} features")


## Setup Data Loaders


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.1, random_state=42, stratify=y)

train_ds = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train).unsqueeze(1))
val_ds = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val).unsqueeze(1))
train_loader = DataLoader(train_ds, batch_size=256, shuffle=True, drop_last=True)
val_loader = DataLoader(val_ds, batch_size=256, shuffle=False)

print(f"Train: {len(X_train)}, Val: {len(X_val)}")


In [None]:
def evaluate(model, loader, device):
    model.eval()
    all_probs, all_labels = [], []
    with torch.no_grad():
        for Xb, yb in loader:
            Xb = Xb.to(device)
            yb = yb.to(device)
            probs = model(Xb).cpu().numpy()
            all_probs.append(probs)
            all_labels.append(yb.cpu().numpy())
    probs = np.vstack(all_probs)[:, 0]
    labels = np.vstack(all_labels)[:, 0]
    preds = (probs > 0.5).astype(int)
    return {
        "accuracy": accuracy_score(labels, preds),
        "precision": precision_score(labels, preds, zero_division=0),
        "recall": recall_score(labels, preds, zero_division=0),
        "f1": f1_score(labels, preds, zero_division=0),
        "roc_auc": roc_auc_score(labels, probs),
    }


## Initialize Model and Training Setup


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))
print(f"Using device: {device}")

pos_weight = 2.0
hidden_dims = [256, 128]
dropout = 0.3
lr = 0.002
weight_decay = 1e-5
max_epochs = 40
patience = 6

model = PatentNoveltyNet(input_dim=X.shape[1], hidden_dims=hidden_dims, dropout=dropout, use_residual=True).to(device)

def forward_logits(x):
    out = model.input_bn(x)
    out = model.hidden_layers(out)
    out = model.output_bn(out)
    return model.output_layer(out)

criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight], device=device))
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=3)


## Training Loop


In [None]:
best_val_loss = float("inf")
best_state = None
patience_ctr = 0
history = {"train_loss": [], "val_loss": [], "val_metrics": []}

for epoch in range(max_epochs):
    model.train()
    train_loss = 0.0
    batches = 0
    for Xb, yb in train_loader:
        Xb = Xb.to(device)
        yb = yb.to(device)
        optimizer.zero_grad()
        logits = forward_logits(Xb)
        loss = criterion(logits, yb)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        train_loss += loss.item()
        batches += 1
    avg_train = train_loss / batches

    model.eval()
    val_loss = 0.0
    v_batches = 0
    with torch.no_grad():
        for Xb, yb in val_loader:
            Xb = Xb.to(device)
            yb = yb.to(device)
            logits = forward_logits(Xb)
            loss = criterion(logits, yb)
            val_loss += loss.item()
            v_batches += 1
    avg_val = val_loss / v_batches if v_batches else 0.0
    scheduler.step(avg_val)

    history["train_loss"].append(avg_train)
    history["val_loss"].append(avg_val)

    val_metrics = evaluate(model, val_loader, device)
    history["val_metrics"].append(val_metrics)

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1}: Train Loss={avg_train:.4f}, Val Loss={avg_val:.4f}, Val Acc={val_metrics['accuracy']:.4f}")

    if avg_val < best_val_loss:
        best_val_loss = avg_val
        best_state = model.state_dict()
        patience_ctr = 0
    else:
        patience_ctr += 1
        if patience_ctr >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

if best_state:
    model.load_state_dict(best_state)


## Save Results

In [None]:
out_dir = Path("experimental/bce/outputs/weighted_bce")
out_dir.mkdir(parents=True, exist_ok=True)

torch.save({"model_state_dict": model.state_dict()}, out_dir / "model.pt")
with open(out_dir / "scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
with open(out_dir / "history.json", "w") as f:
    json.dump(history, f, indent=2)

print(f"Saved to {out_dir}")
print(f"Final validation metrics: {history['val_metrics'][-1]}")