# UCI MLPs — Wine & Breast Cancer (PyTorch)
This notebook reproduces the training with a compact configuration.

In [None]:
import torch, numpy as np, pandas as pd
from sklearn.datasets import load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, f1_score
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt

DEVICE=torch.device('cpu')

In [None]:
class MLP(nn.Module):
    def __init__(self, in_dim, hidden, out_dim, dropout=0.1):
        super().__init__()
        layers=[]; prev=in_dim
        for h in hidden:
            layers += [nn.Linear(prev,h), nn.ReLU(), nn.Dropout(dropout)]
            prev=h
        layers += [nn.Linear(prev,out_dim)]
        self.net = nn.Sequential(*layers)
    def forward(self,x): return self.net(x)

def accuracy_from_logits(logits, y):
    return (torch.argmax(logits,1)==y).float().mean().item()

In [None]:
def standardize_split(X, y):
    X_trv, X_te, y_trv, y_te = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)
    X_tr, X_va, y_tr, y_va = train_test_split(X_trv, y_trv, test_size=0.2, random_state=0, stratify=y_trv)
    sc=StandardScaler(); X_tr=sc.fit_transform(X_tr); X_va=sc.transform(X_va); X_te=sc.transform(X_te)
    return X_tr,y_tr,X_va,y_va,X_te,y_te

def train_one(X_tr,y_tr,X_va,y_va,X_te,y_te,n_classes,tag='run'):
    model=MLP(X_tr.shape[1],[64,32],n_classes,0.1)
    opt=torch.optim.Adam(model.parameters(),lr=1e-3,weight_decay=1e-4)
    crit=nn.CrossEntropyLoss()
    hist={'trL':[],'vaL':[],'trA':[],'vaA':[]}
    patience=8; best=-1; wait=0
    for ep in range(1,81):
        model.train(); Ls=[]; As=[]
        for i in range(0,len(X_tr),32):
            xb=torch.tensor(X_tr[i:i+32],dtype=torch.float32)
            yb=torch.tensor(y_tr[i:i+32],dtype=torch.long)
            opt.zero_grad(); lg=model(xb); loss=crit(lg,yb); loss.backward(); opt.step()
            Ls.append(loss.item()); As.append(accuracy_from_logits(lg,yb))
        model.eval(); vLs=[]; vAs=[]; import numpy as np
        with torch.no_grad():
            for i in range(0,len(X_va),32):
                xb=torch.tensor(X_va[i:i+32],dtype=torch.float32); yb=torch.tensor(y_va[i:i+32],dtype=torch.long)
                lg=model(xb); vLs.append(crit(lg,yb).item()); vAs.append(accuracy_from_logits(lg,yb))
        hist['trL'].append(np.mean(Ls)); hist['vaL'].append(np.mean(vLs)); hist['trA'].append(np.mean(As)); hist['vaA'].append(np.mean(vAs))
        # early stop on macro F1
        yvp=[]; yvt=[]
        with torch.no_grad():
            for i in range(0,len(X_va),32):
                xb=torch.tensor(X_va[i:i+32],dtype=torch.float32); yb=torch.tensor(y_va[i:i+32],dtype=torch.long)
                lg=model(xb); yvp.append(torch.argmax(lg,1).numpy()); yvt.append(yb.numpy())
        import numpy as np
        from sklearn.metrics import f1_score
        f1=f1_score(np.concatenate(yvt), np.concatenate(yvp), average='macro', zero_division=0)
        if f1>best+1e-6: best=f1; best_state={k:v.clone() for k,v in model.state_dict().items()}; wait=0
        else:
            wait+=1
        if wait>=patience: break
    model.load_state_dict(best_state)
    # test metrics
    yp=[]; yt=[]
    with torch.no_grad():
        for i in range(0,len(X_te),32):
            xb=torch.tensor(X_te[i:i+32],dtype=torch.float32); yb=torch.tensor(y_te[i:i+32],dtype=torch.long)
            lg=model(xb); yp.append(torch.argmax(lg,1).numpy()); yt.append(yb.numpy())
    import numpy as np
    yp=np.concatenate(yp); yt=np.concatenate(yt)
    from sklearn.metrics import accuracy_score, precision_recall_fscore_support
    acc=accuracy_score(yt,yp); pr,rc,f1,_=precision_recall_fscore_support(yt,yp,average='macro', zero_division=0)
    return model,hist,{'accuracy':acc,'precision_macro':pr,'recall_macro':rc,'f1_macro':f1}

In [None]:
# Run on Wine
wine=load_wine(); Xw=wine.data.astype('float32'); yw=wine.target.astype('int64')
Xtr,ytr,Xva,yva,Xte,yte=standardize_split(Xw,yw)
model_w,hist_w,met_w=train_one(Xtr,ytr,Xva,yva,Xte,yte,len(wine.target_names),'wine')
met_w

In [None]:
# Run on Breast Cancer
bc=load_breast_cancer(); Xb=bc.data.astype('float32'); yb=bc.target.astype('int64')
Xtr,ytr,Xva,yva,Xte,yte=standardize_split(Xb,yb)
model_b,hist_b,met_b=train_one(Xtr,ytr,Xva,yva,Xte,yte,len(bc.target_names),'breast_cancer')
met_b

## Plotting helpers
The following cell plots training/validation loss and accuracy for each dataset.

In [None]:
import matplotlib.pyplot as plt
# Wine plots
plt.figure(); plt.plot(hist_w['trL'],label='Train Loss'); plt.plot(hist_w['vaL'],label='Val Loss'); plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Wine — Loss'); plt.legend();
plt.figure(); plt.plot(hist_w['trA'],label='Train Acc'); plt.plot(hist_w['vaA'],label='Val Acc'); plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Wine — Accuracy'); plt.legend();
# Breast plots
plt.figure(); plt.plot(hist_b['trL'],label='Train Loss'); plt.plot(hist_b['vaL'],label='Val Loss'); plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Breast Cancer — Loss'); plt.legend();
plt.figure(); plt.plot(hist_b['trA'],label='Train Acc'); plt.plot(hist_b['vaA'],label='Val Acc'); plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Breast Cancer — Accuracy'); plt.legend();