In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [17]:
# MacoOS device agnostic code:

if torch.cuda.is_available():
    device = "cuda" # Use NVIDIA GPU (if available)
elif torch.backends.mps.is_available():
    device = "mps" # Use Apple Silicon GPU (if available)
else:
    device = "cpu" # Default to CPU if no GPU is available

device

'mps'

In [18]:
# 1. Define your CNN module

class SimpleCNN(nn.Module):
    def __init__(self, channels=32, dropout=0.0):
        super().__init__()
        self.conv1 = nn.Conv2d(1, channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(channels, channels * 2, kernel_size=3, padding=1)
        self.fc1   = nn.Linear((channels * 2) * 7 * 7, 128)
        self.fc2   = nn.Linear(128, 10)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(self.dropout(x)))
        x = self.fc2(x)
        return x

In [19]:
# 2. Prepare dataset (FashionMNIST)

transform = transforms.Compose([
    transforms.ToTensor()
    , transforms.Normalize((0.5,), (0.5,))
])

train_data = datasets.FashionMNIST(
    root="./data"
    , train=True
    , download=True
    , transform=transform
)

X = train_data.data.unsqueeze(1).float() / 255.0  # shape: (N, 1, 28, 28)
y = train_data.targets

X.shape, y.shape

(torch.Size([60000, 1, 28, 28]), torch.Size([60000]))

In [20]:
# 3. Wrap model with skorch

# net = NeuralNetClassifier(
#     module=SimpleCNN
#     , max_epochs=3  # keep small for testing
#     , lr=0.001
#     , optimizer=torch.optim.Adam
#     , batch_size=256
#     , device=device
# )

net = NeuralNetClassifier(
    SimpleCNN
    , max_epochs=3
    , lr=0.001
    , optimizer=torch.optim.Adam
    , batch_size=256
    , device=device
    , verbose=0   # suppress per-epoch logs
)

In [22]:
# 4. Define parameter grid
#    Note: use 'module__' prefix for module args

param_grid = {
    "module__channels": [32, 64]
    , "module__dropout": [0.0, 0.25, 0.5] 
    , "lr": [1e-4, 3e-4, 1e-3]
    , "optimizer": [torch.optim.Adam, torch.optim.SGD]
    , "batch_size": [256]
    , "max_epochs": [3]
}


In [None]:
%%time

# 5B. Faster Randomized Search (sample combos)

param_dist = {
    "module__channels": [32, 64]
    , "module__dropout": [0.0, 0.25, 0.5]
    , "lr": [1e-4, 3e-4, 1e-3]
    , "optimizer": [torch.optim.Adam, torch.optim.SGD]
    , "batch_size": [256]
}

rs = RandomizedSearchCV(
    estimator=net
    , param_distributions=param_dist
    , n_iter=10          # try only 10 random combos
    , cv=3
    , scoring="accuracy"
    , verbose=0
    , random_state=27
    , n_jobs=-1
)

# Fit the randomized search
rs.fit(X, y)

print("Best CV accuracy:", rs.best_score_)
print("Best params:", rs.best_params_)

In [None]:
# Send results to a dataframe for clean review;

results = pd.DataFrame(rs.cv_results_)
print(results[[
    "mean_test_score"
    , "std_test_score"
    , "params"
]].sort_values("mean_test_score", ascending=False))

In [None]:
# 5A. Exhaustive Grid Search (slow but complete)
# ---------------------------------------------------------
gs = GridSearchCV(
    estimator=net
    , param_grid=param_grid
    , cv=3               # 3-fold cross validation
    , scoring="accuracy" # use accuracy for FashionMNIST
    , verbose=2
    , n_jobs=-1          # parallelize if possible
)

# Fit the grid search
gs.fit(X, y)

print("Best CV accuracy:", gs.best_score_)
print("Best params:", gs.best_params_)