# Lab 6.1 - MNIST MLP with PyTorch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import GridSearchCV
from skorch import NeuralNetClassifier

In [None]:
torch.random.manual_seed(0)

<torch._C.Generator at 0x7f8435e3f230>

In [None]:
# Define the neural network
class MLP(nn.Module):
    def __init__(self, num_layers=10, num_units=128, dropout=0.5):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.input_layer = nn.Linear(28*28, num_units)
        self.relu = nn.ReLU()
        self.hidden_layers = nn.ModuleList(
            [nn.Linear(num_units, num_units) for _ in range(num_layers)])
        self.output_layer = nn.Linear(num_units, 10)
        self.dropout = nn.Dropout(dropout)


    def forward(self, x):
        x = self.flatten(x)
        x = self.input_layer(x)
        x = self.relu(x)
        for hidden_layer in self.hidden_layers:
            x = hidden_layer(x)
            x = self.relu(x)
            x = self.dropout(x)
        x = self.output_layer(x)
        return x

In [None]:
# Load the MNIST dataset
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform)

testset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform)

In [None]:
# Convert trainset and testset to numpy arrays
X_train = trainset.data.numpy()
y_train = trainset.targets.numpy()
X_test = testset.data.numpy()
y_test = testset.targets.numpy()

# Convert numpy arrays to tensors
X_train = torch.from_numpy(X_train).to(torch.float)
y_train = torch.from_numpy(y_train)
X_test = torch.from_numpy(X_test).to(torch.float)
y_test = torch.from_numpy(y_test)

print("X_train.shape", X_train.shape)
print("y_train.shape", y_train.shape)
print("X_train.dtype", X_train.dtype)
print("y_train.dtype", y_train.dtype)

X_train.shape torch.Size([60000, 28, 28])
y_train.shape torch.Size([60000])
X_train.dtype torch.float32
y_train.dtype torch.int64


In [None]:
print("Labels", set(y_train.numpy()))

Labels {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}


In [None]:
# Wrap the MLP into a scikit-learn classifier
mlp = MLP()
# mlp.to(torch.device("cuda"))
model = NeuralNetClassifier(
    mlp,
    max_epochs=50,
    lr=0.001,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss,
    verbose=True,
)


In [None]:
# Define hyperparameters for grid search
params = {
    "lr": [0.0001, 0.001],
    "module__num_layers": [3, 5],
    "module__num_units": [128, 256],
    "max_epochs": [50],
}

grid_search = GridSearchCV(estimator=model, param_grid=params, cv=3, verbose=2)
grid_search.fit(X_train, y_train)


Fitting 3 folds for each of 8 candidates, totalling 24 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.4395[0m       [32m0.6560[0m        [35m1.4228[0m  2.1830
      2        [36m1.4204[0m       [32m0.8241[0m        [35m0.6592[0m  1.8642
      3        [36m0.9120[0m       [32m0.8819[0m        [35m0.4254[0m  1.8654
      4        [36m0.6820[0m       [32m0.9042[0m        [35m0.3386[0m  1.9449
      5        [36m0.5499[0m       [32m0.9181[0m        [35m0.2905[0m  1.9649
      6        [36m0.4588[0m       [32m0.9273[0m        [35m0.2652[0m  2.5458
      7        [36m0.3903[0m       [32m0.9323[0m        [35m0.2465[0m  1.9108
      8        [36m0.3475[0m       [32m0.9387[0m        [35m0.2281[0m  1.9108
      9        [36m0.3149[0m       [32m0.9431[0m        [35m0.2190[0m  2.8152
     10        [36m0.2740[0m       [32m0.9446[0m        [35m0.213

In [None]:
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)

Best hyperparameters: {'lr': 0.0001, 'max_epochs': 50, 'module__num_layers': 5, 'module__num_units': 256}


In [None]:
# Find the model with the best parameters
best_model = grid_search.best_estimator_

In [None]:
best_model.fit(X_train, y_train)

Re-initializing module because the following parameters were re-set: num_layers, num_units.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.1124[0m       [32m0.5838[0m        [35m1.1552[0m  6.2696
      2        [36m0.9383[0m       [32m0.8898[0m        [35m0.4071[0m  6.6761
      3        [36m0.5178[0m       [32m0.9348[0m        [35m0.2550[0m  6.1313
      4        [36m0.3504[0m       [32m0.9487[0m        [35m0.2122[0m  6.8096
      5        [36m0.2635[0m       [32m0.9557[0m        [35m0.1862[0m  6.2095
      6        [36m0.2176[0m       [32m0.9582[0m        [35m0.1775[0m  6.8460
      7        [36m0.1914[0m       [32m0.9624[0m        [35m0.1616[0m  6.2218
      8        [36m0.1663[0m       [32m0.9634[0m        [35m0.1505[0m  6.8586
      9        [36m0.1438[0m       [32m0.9651[0m        0.1611  

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MLP(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (input_layer): Linear(in_features=784, out_features=256, bias=True)
    (relu): ReLU()
    (hidden_layers): ModuleList(
      (0-4): 5 x Linear(in_features=256, out_features=256, bias=True)
    )
    (output_layer): Linear(in_features=256, out_features=10, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  ),
)

In [None]:
best_model.score(X_test, y_test)


0.9788

In [None]:
# TODO: Load another dataset of your choice.

In [None]:
# TODO: Reimplement the MLP class to work with the new dataset.

In [None]:
# TODO: Wrap the MLP into a scikit-learn classifier

In [None]:
# TODO: Perform grid search to find the best hyperparameters.

In [None]:
# TODO: Train and evaluate the MLP on the new dataset using the best hyperparameters.