In [None]:
# MacoOS device agnostic code:

if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available

device

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [2]:
# 1) Define a simple CNN

class SimpleCNN(nn.Module):
    def __init__(self, channels=32, dropout=0.25):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, channels, kernel_size=3, padding=1)
            , nn.ReLU(inplace=True)
            , nn.MaxPool2d(2)
            , nn.Conv2d(channels, channels * 2, kernel_size=3, padding=1)
            , nn.ReLU(inplace=True)
            , nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten()
            , nn.Dropout(dropout)
            , nn.Linear((channels * 2) * 7 * 7, 128)
            , nn.ReLU(inplace=True)
            , nn.Dropout(dropout)
            , nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# 2) Data transforms
transform = transforms.Compose([
    transforms.ToTensor()
    , transforms.Normalize((0.5,), (0.5,))
])

In [3]:
# 3) Load datasets

train_ds = datasets.FashionMNIST(
    root="./data"
    , train=True
    , download=True
    , transform=transform
)
test_ds = datasets.FashionMNIST(
    root="./data"
    , train=False
    , download=True
    , transform=transform
)

In [4]:
# 4) Skorch estimator wrapping our PyTorch module

net = NeuralNetClassifier(
    module=SimpleCNN
    , module__channels=32
    , module__dropout=0.25
    , criterion=nn.CrossEntropyLoss
    , optimizer=torch.optim.Adam
    , optimizer__weight_decay=0.0
    , lr=1e-3
    , batch_size=128
    , max_epochs=6
    , iterator_train__shuffle=True
    , device=device
)

In [None]:
# 5) Define param grid (note skorch prefixes module params with 'module__')
param_grid = {
    "module__channels": [16, 32, 64]
    , "module__dropout": [0.0, 0.25, 0.5]
    , "lr": [1e-3, 3e-4]
    , "optimizer": [torch.optim.Adam, torch.optim.SGD]
    , "optimizer__weight_decay": [0.0, 1e-4]
    , "batch_size": [64, 128]
    , "max_epochs": [6]  # keep small for demo
}


In [None]:
# 6) GridSearchCV
gs = GridSearchCV(
    estimator=net
    , param_grid=param_grid
    , scoring="accuracy"
    , n_jobs=1  # set >1 if CPU allows and your model is CPU-bound
    , cv=3
    , refit=True  # refit on full training set with best params
    , verbose=1
)

In [None]:
# 7) Fit on the training dataset; skorch can take PyTorch datasets directly
gs.fit(train_ds, y=None)

print("Best params:", gs.best_params_)
print("Best CV score:", gs.best_score_)

# 8) Evaluate on test set
# Build a DataLoader to get predictions easily
test_loader = DataLoader(
    test_ds
    , batch_size=256
    , shuffle=False
)

y_true, y_pred = [], []
for X, y in test_loader:
    y_true.append(y.numpy())
    y_pred.append(gs.best_estimator_.predict(X).numpy())

y_true = np.concatenate(y_true)
y_pred = np.concatenate(y_pred)
test_acc = accuracy_score(y_true, y_pred)
print("Test accuracy:", test_acc)
