# Cifar10 dataset

# Import the packages

In [None]:
import torch
import torch.nn as nn
import torchvision

# Setup
# Preprocessing

In [None]:
from torchvision.transforms import Compose, ToTensor, Normalize, Resize

# TODO: investigate the rescalling option
# IMG_SIZE = 160 # origninal img size (32, 32, 3), optimal img size (224, 224, 3)

# Define a transform to normalize the data
transform: Compose = Compose([
    
    # Resize((32, 32)),
    # Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] 
    # to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
    ToTensor(),
    
    # Normalize a tensor image with mean and standard deviation.
    # output[channel] = (input[channel] - mean[channel]) / std[channel] -> normalised = (original - 0.5) / 0.5]
    # Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

## Load the dataset

In [None]:
from torch import Tensor
from torch.utils.data import random_split
from torchvision.datasets import CIFAR10

# load the full CIFAR-10 training data
full_trainset: CIFAR10 = CIFAR10(
    root='../data',
    train=True,
    download=True,
    transform=transform
)

# split the full training data into train and validation sets
train_size: int = int(0.8 * len(full_trainset))
valid_size: int = len(full_trainset) - train_size

# TODO: check annotation type, and understand the function
trainset, validset = random_split(full_trainset, [train_size, valid_size])

# load the test data
testset: CIFAR10 = CIFAR10(
    root='../data',
    train=False,
    download=True,
    transform=transform
)

In [None]:
# print dataset sizes
print(f"All samples: {len(trainset)+len(validset)+len(testset)}")
print(f"Train samples: {len(trainset)}")
print(f"Validation samples: {len(validset)}")
print(f"Test samples: {len(testset)}")

# get one sample to check image shape
image, label = full_trainset[0]
print(f"Single image shape: {image.shape}")

## Create data loaders

In [None]:
from torch.utils.data import DataLoader

train_loader: DataLoader = DataLoader(
    trainset,
    batch_size=128,
    shuffle=True,
    num_workers=4,    
    pin_memory=True,
    persistent_workers=True
)
# 
# validloader = torch.utils.data.DataLoader(
valid_loader: DataLoader = DataLoader(
    validset,
    batch_size=256,
    shuffle=False,
    num_workers=4,    
    pin_memory=True,
    persistent_workers=True
)

test_loader: DataLoader = DataLoader(
    testset,
    batch_size=256,
    shuffle=False,
    num_workers=4,    
    pin_memory=True,
    persistent_workers=True
)

# Check shapes
xb, yb = next(iter(train_loader))
print("Batch shape:", xb.shape)   # should be [B, 3, 32, 32]

## View a batch of images

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# TODO: check the bug -> it is not an 8 by 8 figure, why?
# Helper function to display an image grid
def show_image(image_tensor: torch.Tensor) -> None:
    """Convert a PyTorch tensor into a viewable image grid and show it."""

    # Step 1 Undo normalization: normalised = (original - 0.5) / 0.5] -> normalised * 2 +0.5 = original
    unnormalized_image_grid = image_tensor * 0.5 + 0.5

    # Step 2: Convert from a PyTorch tensor to a NumPy array (for matplotlib)
    # .detach() removes gradient tracking; .cpu() ensures data is on the CPU.
    image_grid_C_H_W = unnormalized_image_grid.detach().cpu().numpy()
    
    print("Shape before transpose:", image_grid_C_H_W.shape)  
    # (C, H, W) → C = number of color channels (3 for RGB), H = height, W = width

    # Step 3: Rearrange dimensions from (C, H, W) to (H, W, C)
    # because Matplotlib expects the color channel as the last dimension.
    image_grid_H_W_C = np.transpose(image_grid_C_H_W, (1, 2, 0))
    print("Shape after transpose:", image_grid_H_W_C.shape)   # (H, W, C)

    # Step 4: Display the image
    plt.figure(figsize=(12, 12))
    plt.imshow(image_grid_H_W_C)
    plt.axis("off")  # hide axis numbers
    plt.show()
    
# Get a batch of training images
data_iter: iter = iter(train_loader)  # Create an iterator for the DataLoader
images: torch.Tensor
labels: torch.Tensor
images, labels = next(data_iter)   # Get one batch (images + labels)

# Make a grid from the batch
image_grid: torch.Tensor = torchvision.utils.make_grid(images, nrow=8)  # 8 images per row


# Show the grid
show_image(image_grid)


## Get class labels

In [None]:
# TODO: Get the label directly cifar10_classes = cifar10.classes

cifar10_classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

print(cifar10_classes [labels[0]])  


In [None]:
cifar10_classes

# Convolutional Variational Autoencoder (Conv-VAE)
https://github.com/ageron/handson-mlp/blob/main/18_autoencoders_gans_and_diffusion_models.ipynb 

In [None]:
from collections import namedtuple
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## Model

In [None]:
# ?nn.Linear

In [None]:
VAEOutput = namedtuple("VAEOutput", ["output", "codings_mean", "codings_logvar"])
# TODO: rename VAE model to CVAE
# TODO: try batchnorm, dropout
class VAE(nn.Module):
    def __init__(self, codings_dim=32):
        super(VAE, self).__init__()
        self.codings_dim = codings_dim

        # Encoder: 3x32x32 -> 256x4x4
        self.encoder_cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=4, stride=2, padding=1),   # 32x32 -> 16x16
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1),  # 16x16 -> 8x8
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1), # 8x8 -> 4x4
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),# 4x4 -> 4x4
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.flat_dim = 256 * 4 * 4  # 4096 <-- for image 32x32

        # Variational heads
        self.fc_mu     = nn.Linear(self.flat_dim, codings_dim)
        self.fc_logvar = nn.Linear(self.flat_dim, codings_dim)

        # Decoder: z -> 256x4x4 -> 3x32x32
        self.fc_dec = nn.Linear(codings_dim, self.flat_dim)
        self.decoder_cnn = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),  # 4->8
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),   # 8->16
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),    # 16->32
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(32, 3, kernel_size=3, stride=1, padding=1),              # logits or probs (see note)
            nn.Sigmoid()  # <- keep this since your loss uses MSE on [0,1]
        )

    def encode(self, X):
        h = self.encoder_cnn(X).flatten(1)              # (B, 4096)
        mu = self.fc_mu(h)                               # (B, codings_dim)
        logvar = self.fc_logvar(h)                       # (B, codings_dim)
        return mu, logvar

    @staticmethod
    def sample_codings(mean, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mean + eps * std

    def decode(self, Z):
        h = self.fc_dec(Z).view(-1, 256, 4, 4)          # (B,256,4,4) 
        x_recon = self.decoder_cnn(h)                   # (B,3,32,32) in [0,1]
        return x_recon

    def forward(self, X):
        mean, logvar = self.encode(X)
        Z = self.sample_codings(mean, logvar)
        output = self.decode(Z)
        return VAEOutput(output, mean, logvar)

## Model training

In [None]:
def vae_loss(y_pred, x_target, kl_weight=1.0):
    output, mean, logvar = y_pred
    # MSE reconstruction
    recon = F.mse_loss(output, x_target)
    # recon = F.binary_cross_entropy_with_logits(output, x_target, reduction="mean") # logits + BCEWithLogitsLoss:
    # KL divergence
    kl_div = -0.5 * torch.sum(1 + logvar - logvar.exp() - mean.pow(2), dim=-1).mean()
    # Scale KL to per-pixel magnitude
    n_pixels = x_target[0].numel()
    return recon + kl_weight * (kl_div / n_pixels)

def evaluate_tm(model, data_loader, metric):
    model.eval()
    metric.reset()
    with torch.no_grad():
        for X_batch, _ in data_loader:
            X_batch = X_batch.to(device)
            y_pred = model(X_batch)
            out = y_pred.output if isinstance(y_pred, tuple) else y_pred
            metric.update(out, X_batch)  # compare recon to input
    return metric.compute()

def train(model, optimizer, loss_fn, metric, train_loader, valid_loader, n_epochs=20):
    history = {"train_losses": [], "train_metrics": [], "valid_metrics": []}
    for epoch in range(n_epochs):
        total_loss = 0.0
        metric.reset()
        model.train()
        for index, (X_batch, _) in enumerate(train_loader):
            X_batch = X_batch.to(device)
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, X_batch)   # use input as target
            total_loss += loss.item()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            optimizer.zero_grad()

            out = y_pred.output if isinstance(y_pred, tuple) else y_pred
            metric.update(out, X_batch)

        train_metric = metric.compute().item()
        val_metric = evaluate_tm(model, valid_loader, metric).item()

        history["train_losses"].append(total_loss / len(train_loader))
        history["train_metrics"].append(train_metric)
        history["valid_metrics"].append(val_metric)

        print(f"Epoch {epoch+1:02d}/{n_epochs} "
              f"loss={history['train_losses'][-1]:.4f}, "
              f"train RMSE={train_metric:.4f}, val RMSE={val_metric:.4f}")
    return history


torch.manual_seed(42)
vae = VAE(codings_dim=32).to(device)
# TODO: hyperparameter to tune -> learning rate
optimizer = torch.optim.NAdam(vae.parameters(), lr=1e-4)
rmse = torchmetrics.MeanSquaredError(squared=False).to(device)

history = train(vae, optimizer, vae_loss, rmse, train_loader, valid_loader, n_epochs=30)

## Plot loss

In [None]:
import matplotlib.pyplot as plt

# --- Plot training loss ---
plt.figure(figsize=(8, 4))
plt.plot(history["train_losses"], label="Train loss", color="tab:blue")
plt.title("VAE Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True, linestyle="--", alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

# --- Plot RMSE (train vs validation) ---
plt.figure(figsize=(8, 4))
plt.plot(history["train_metrics"], label="Train RMSE", color="tab:green")
plt.plot(history["valid_metrics"], label="Validation RMSE", color="tab:orange")
plt.title("Reconstruction RMSE")
plt.xlabel("Epoch")
plt.ylabel("RMSE")
plt.grid(True, linestyle="--", alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()


## Visualization

In [None]:
import matplotlib.pyplot as plt
import torch

def plot_image(img):
    """Convert a tensor to a displayable image and plot it."""
    img = img.detach().cpu()
    if img.ndim == 3:
        img = img.permute(1, 2, 0)  # (C,H,W) -> (H,W,C)
    plt.imshow(img, cmap="gray")
    plt.axis("off")

def plot_reconstructions(model, data_loader, n_images=8):
    """
    Show original (top row) and reconstructed (bottom row) images from the VAE.
    """
    model.eval()

    # --- Get a batch of images ---
    X_batch, _ = next(iter(data_loader))
    X_batch = X_batch.to(device)
    X_batch = X_batch[:n_images]  # take first n_images only

    # --- Forward pass ---
    with torch.no_grad():
        out = model(X_batch)
        recon = out.output if hasattr(out, "output") else out

    # --- Plot originals and reconstructions ---
    fig, axes = plt.subplots(2, n_images, figsize=(n_images * 1.5, 3))

    for i in range(n_images):
        # Top row: original
        axes[0, i].imshow(X_batch[i].detach().cpu().permute(1, 2, 0))
        axes[0, i].axis("off")

        # Bottom row: reconstruction
        axes[1, i].imshow(recon[i].detach().cpu().permute(1, 2, 0))
        axes[1, i].axis("off")
    

    fig.suptitle("Top: Original images | Bottom: Reconstructions", fontsize=12)
    plt.tight_layout()
    plt.show()

# --- Use it ---
plot_reconstructions(vae, valid_loader, n_images=8)


# Exploration of the latent represenation (umap)

In [None]:
import torch
import numpy as np

def collect_mu(vae, loader, device):
    vae.eval()
    mus, ys = [], []
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            out = vae(x)
            # Handle both: namedtuple VAEOutput or plain tensor
            mu = out.codings_mean if hasattr(out, "codings_mean") else vae.encode(x)[0]
            mus.append(mu.detach().cpu().numpy())
            ys.append(y.numpy())
    X_mu = np.concatenate(mus, axis=0)   # shape: (N, codings_dim)
    y_all = np.concatenate(ys, axis=0)   # shape: (N,)
    return X_mu, y_all

# Example:
X_train_mu, y_train = collect_mu(vae, train_loader, device)
X_valid_mu, y_valid = collect_mu(vae, valid_loader, device)


In [None]:
# pip install umap-learn
import umap
from sklearn.preprocessing import StandardScaler

# TODO: try to improve by playing with the parameters

umap_2d = umap.UMAP(
    n_neighbors=15,      # local vs global structure (try 10–50)
    min_dist=0.1,        # how tight clusters look (0.0–0.5)
    n_components=2,      # 2D for plotting (set 3 for 3D)
    metric="euclidean",  # try "cosine" if features are directional
    random_state=42
)

# Z_train_2d = umap_2d.fit_transform(X_train_mu)  # (N_train, 2)
# Z_valid_2d = umap_2d.transform(X_valid_mu)      # (N_valid, 2)



scaler = StandardScaler().fit(X_train_mu)
X_train_mu_s = scaler.transform(X_train_mu)
X_valid_mu_s = scaler.transform(X_valid_mu)
Z_train_2d = umap_2d.fit_transform(X_train_mu_s)
Z_valid_2d = umap_2d.transform(X_valid_mu_s)


In [None]:
import matplotlib.pyplot as plt

def scatter_umap(Z, y, title="UMAP of VAE μ", num_classes=None):
    plt.figure(figsize=(7,6))
    sc = plt.scatter(Z[:,0], Z[:,1], c=y, s=8, alpha=0.8, cmap="tab10")
    if num_classes is None:
        num_classes = len(np.unique(y))
    plt.colorbar(sc, ticks=range(num_classes))
    plt.title(title)
    plt.xlabel("UMAP-1"); plt.ylabel("UMAP-2")
    plt.grid(True, ls="--", alpha=0.3)
    plt.tight_layout()
    plt.show()

scatter_umap(Z_train_2d, y_train, title="Train μ → UMAP (2D)")
scatter_umap(Z_valid_2d, y_valid, title="Valid μ → UMAP (2D)")



Overall structure

* The map shows a continuous, dense cloud rather than distinct clusters.

* That’s expected — your autoencoder is trained unsupervised only to reconstruct images, not to separate classes.

* Therefore, latent codes mainly capture visual similarity (colors, textures, shapes) rather than semantic categories.

# Find outliers
test 

# Sample from the latent distribution

# overlay with data points
What does it mean?

# Feature extraction 
From latent space

In [None]:
device = next(vae.parameters()).device

# --- helper to collect μ and labels from a loader ---
def collect_mu(vae, loader, device, name="dataset"):
    vae.eval()
    mus, ys = [], []
    start_time = time.time()
    total_batches = len(loader)

    print(f"\n[INFO] Extracting features from {name} ({total_batches} batches)...")
    with torch.no_grad():
        for i, (x, y) in enumerate(loader, start=1):
            x = x.to(device, non_blocking=True)
            mu, _ = vae.encode(x)
            mus.append(mu.detach().cpu().numpy())
            ys.append(y.numpy())

            if i % 20 == 0 or i == total_batches:
                elapsed = time.time() - start_time
                print(f"  Batch {i:>4}/{total_batches} | Time elapsed: {elapsed:5.1f}s")

    X = np.concatenate(mus, axis=0)
    y = np.concatenate(ys, axis=0)
    print(f"[INFO] Done {name}: {X.shape[0]} samples, {X.shape[1]}-dim μs | Total time: {time.time()-start_time:.1f}s\n")
    return X, y


# 1) Extract μ for all splits
X_train, y_train = collect_mu(vae, train_loader, device, "train set")
X_valid, y_valid = collect_mu(vae, valid_loader, device, "validation set")

# 2) Standardize
print("[INFO] Standardizing features...")
scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_valid_s = scaler.transform(X_valid)
print("  Done scaling.\n")

# Train classifier/regressor 
Using the latent space representation of the samples

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report, confusion_matrix


from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

from sklearn.svm import LinearSVC
from sklearn.dummy import DummyClassifier

from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline

import time
import numpy as np


## Helper functions

In [None]:
def evaluate_classifier(
    clf, # scikit-learn classifier or pipeline
    X: np.ndarray, 
    y: np.ndarray, 
    name: str="Model")->None:
    
    y_predicted = clf.predict(X)
    print(f"[{name}] Accuracy:", accuracy_score(y, y_predicted))
    print(classification_report(y, y_predicted, digits=4))
    return None

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(model, X_valid, y_valid, class_names=None,
                          title="Confusion Matrix", normalize=True, figsize=(7,6)):
    """
    Compute and plot the confusion matrix for a classifier.

    Parameters
    ----------
    model : trained classifier (e.g., SVM, MLP)
        Must implement `.predict()`.
    X_valid : array-like
        Validation features.
    y_valid : array-like
        True labels.
    class_names : list of str, optional
        Names of classes (e.g., CIFAR-10 class names). If None, uses label integers.
    title : str, optional
        Plot title.
    normalize : bool, optional
        Normalize each row to sum to 1.
    figsize : tuple, optional
        Figure size.
    """
    # --- Predict ---
    y_pred = model.predict(X_valid)

    # --- Compute confusion matrix ---
    cm = confusion_matrix(y_valid, y_pred)

    # --- Normalize if requested ---
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1, keepdims=True)

    # --- Define labels for axes ---
    if class_names is None:
        labels = np.unique(y_valid)
    else:
        labels = class_names

    # --- Plot ---
    plt.figure(figsize=figsize)
    sns.heatmap(
        cm, annot=True, fmt=".2f" if normalize else "d", cmap="Blues",
        xticklabels=labels, yticklabels=labels
    )
    plt.title(title + (" (Normalized)" if normalize else ""))
    plt.xlabel("Predicted label")
    plt.ylabel("True label")
    plt.tight_layout()
    plt.show()



In [None]:
# --- Dummy baseline classifier ---
# Strategy options:
# "most_frequent" → always predicts the most common class
# "stratified" → random predictions following class frequencies
# "uniform" → random uniform over classes
dummy = DummyClassifier(strategy="uniform", random_state=42)
dummy.fit(X_train_s, y_train)
y_dummy = dummy.predict(X_valid_s)
dummy_acc = dummy.score(X_valid_s, y_valid)

print(f"\nBaseline (DummyClassifier, strategy='most_frequent') Accuracy: {dummy_acc:.4f}\n")
print("Classification Report (Dummy Baseline):")
print(classification_report(y_valid, y_dummy, digits=4, zero_division=0))

In [None]:
# --- Logistic Regression on UMAP features ---
lr_clf = LogisticRegression(max_iter=1000, random_state=42)
lr_clf.fit(X_train_s, y_train)
y_pred = lr_clf.predict(X_valid_s)
acc = lr_clf.score(X_valid_s, y_valid)

print(f"Validation Accuracy on UMAP(2D): {acc:.4f}\n")
print("Classification Report (Logistic Regression):")
print(classification_report(y_valid, y_pred, digits=4))


plot_confusion_matrix(
    lr_clf,
    X_valid_s,
    y_valid,
    class_names=cifar10_classes,
    title="SVM Classifier on CIFAR-10 (VAE Features)"
)


In [None]:
svm_clf: Pipeline = make_pipeline(
    StandardScaler(),
    LinearSVC(dual=False, C=1.0, max_iter=5000, random_state=42)
)

svm_clf.fit(X_train_s, y_train)

# Evaluate on valid features
evaluate_classifier(clf=svm_clf, 
                    X=X_valid_s, 
                    y=y_valid, 
                    name="SVM")

plot_confusion_matrix(
    svm_clf,
    X_valid_s,
    y_valid,
    class_names=cifar10_classes,
    title="SVM Classifier on CIFAR-10 (VAE Features)"
)

In [None]:
# import time
# import numpy as np
# import torch
# from sklearn.preprocessing import StandardScaler
# from sklearn.neural_network import MLPClassifier
# from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# device = next(vae.parameters()).device

# # --- helper to collect μ and labels from a loader ---
# def collect_mu(vae, loader, device, name="dataset"):
#     vae.eval()
#     mus, ys = [], []
#     start_time = time.time()
#     total_batches = len(loader)

#     print(f"\n[INFO] Extracting features from {name} ({total_batches} batches)...")
#     with torch.no_grad():
#         for i, (x, y) in enumerate(loader, start=1):
#             x = x.to(device, non_blocking=True)
#             mu, _ = vae.encode(x)
#             mus.append(mu.detach().cpu().numpy())
#             ys.append(y.numpy())

#             if i % 20 == 0 or i == total_batches:
#                 elapsed = time.time() - start_time
#                 print(f"  Batch {i:>4}/{total_batches} | Time elapsed: {elapsed:5.1f}s")

#     X = np.concatenate(mus, axis=0)
#     y = np.concatenate(ys, axis=0)
#     print(f"[INFO] Done {name}: {X.shape[0]} samples, {X.shape[1]}-dim μs | Total time: {time.time()-start_time:.1f}s\n")
#     return X, y


# # 1) Extract μ for all splits
# X_train, y_train = collect_mu(vae, train_loader, device, "train set")
# X_valid, y_valid = collect_mu(vae, valid_loader, device, "validation set")

# # 2) Standardize
# print("[INFO] Standardizing features...")
# scaler = StandardScaler().fit(X_train)
# X_train_s = scaler.transform(X_train)
# X_valid_s = scaler.transform(X_valid)
# print("  Done scaling.\n")

# # 3) Train the MLP
# print("[INFO] Training MLP classifier on latent μ features...")
# start = time.time()
# clf = MLPClassifier(hidden_layer_sizes=(256,), max_iter=100, alpha=1e-4, random_state=42)
# clf.fit(X_train_s, y_train)
# print(f"  MLP training done in {time.time()-start:.1f}s\n")

# # 4) Evaluate
# print("[INFO] Evaluating on validation set...")
# y_pred_val = clf.predict(X_valid_s)
# val_acc = accuracy_score(y_valid, y_pred_val)
# print(f"\nValidation accuracy (μ → MLP): {val_acc:.4f}\n")

# print("Classification Report (Validation):")
# print(classification_report(y_valid, y_pred_val, digits=4, zero_division=0))

# cm = confusion_matrix(y_valid, y_pred_val)
# print("Confusion Matrix (Validation):")
# print(cm)
