# Lab 6.2 - MNIST CNN with PyTorch

In [1]:
!pip install skorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting skorch
  Downloading skorch-0.12.1-py3-none-any.whl (193 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.7/193.7 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: skorch
Successfully installed skorch-0.12.1


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import GridSearchCV
from skorch import NeuralNetClassifier

In [13]:
# Define the CNN network for classification on MNIST
class CNN(nn.Module):
    def __init__(self, num_conv_layers, num_filters, kernel_size, num_dense_layers, num_units, dropout, num_classes=10):
        super(CNN, self).__init__()
        self.num_conv_layers = num_conv_layers
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.num_dense_layers = num_dense_layers
        self.num_units = num_units
        self.dropout = dropout
        self.num_classes = num_classes

        # Convolutional layers
        self.conv_layers = nn.ModuleList()
        for i in range(num_conv_layers):
            if i == 0:
                self.conv_layers.append(nn.Conv2d(1, num_filters, kernel_size, padding=1))
            else:
                self.conv_layers.append(nn.Conv2d(num_filters, num_filters, kernel_size, padding=1))
            self.conv_layers.append(nn.ReLU())
            self.conv_layers.append(nn.MaxPool2d(2))
            self.conv_layers.append(nn.Dropout(dropout))
        
        # Dense layers
        self.dense_layers = nn.ModuleList()
        self.conv_out_size = 28 // (2 ** num_conv_layers)
        for i in range(num_dense_layers):
            if i == 0:
                self.dense_layers.append(
                    nn.Linear(self.conv_out_size * self.conv_out_size * num_filters, num_units))
            else:
                self.dense_layers.append(nn.Linear(num_units, num_units))
            self.dense_layers.append(nn.ReLU())
            self.dense_layers.append(nn.Dropout(dropout))
        self.dense_layers.append(nn.Linear(num_units, num_classes))
        
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = x.view(-1, 1, 28, 28)
        for layer in self.conv_layers:
            x = layer(x)
        x = x.view(-1, self.conv_out_size *
                   self.conv_out_size * self.num_filters)
        for layer in self.dense_layers:
            x = layer(x)
        return x

In [14]:
# Load the MNIST dataset
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(
    root='./data', train=True, download=True, transform=transform)

testset = torchvision.datasets.MNIST(
    root='./data', train=False, download=True, transform=transform)

In [15]:
# Convert trainset and testset to numpy arrays
X_train = trainset.data.numpy()
y_train = trainset.targets.numpy()
X_test = testset.data.numpy()
y_test = testset.targets.numpy()

# Convert numpy arrays to tensors
X_train = torch.from_numpy(X_train).to(torch.float)
y_train = torch.from_numpy(y_train)
X_test = torch.from_numpy(X_test).to(torch.float)
y_test = torch.from_numpy(y_test)

print("X_train.shape", X_train.shape)
print("y_train.shape", y_train.shape)
print("X_train.dtype", X_train.dtype)
print("y_train.dtype", y_train.dtype)

X_train.shape torch.Size([60000, 28, 28])
y_train.shape torch.Size([60000])
X_train.dtype torch.float32
y_train.dtype torch.int64


In [18]:
# Perform grid search
model = NeuralNetClassifier(
    module=CNN,
    max_epochs=15,
    lr=0.001,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss,
    batch_size=128,
)

In [19]:
# Define hyperparameters for grid search
params = {
    "module__num_conv_layers": [4],
    "module__num_filters": [32, 64],
    "module__kernel_size": [3],
    "module__num_dense_layers": [3],
    "module__num_units": [128],
    "module__dropout": [0.3],
}

grid_search = GridSearchCV(estimator=model, param_grid=params, cv=3, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 2 candidates, totalling 6 fits
  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m1.7323[0m       [32m0.8472[0m        [35m0.5666[0m  29.9024
[CV] END module__dropout=0.3, module__kernel_size=3, module__num_conv_layers=4, module__num_dense_layers=3, module__num_filters=32, module__num_units=128; total time=  57.3s


KeyboardInterrupt: ignored

In [8]:
# Find the model with the best parameters
model.fit(X_train, y_train)

TypeError: ignored

In [None]:
model.score(X_test, y_test)

In [None]:
# TODO: Load another dataset of your choice.

In [None]:
# TODO: Reimplement the MLP class to work with the new dataset.

In [None]:
# TODO: Wrap the MLP into a scikit-learn classifier

In [None]:
# TODO: Perform grid search to find the best hyperparameters.

In [None]:
# TODO: Train and evaluate the MLP on the new dataset using the best hyperparameters.