In [2]:
import torch
import wandb
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchsummary import summary
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Log in to your W&B account
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mflorianbaer[0m (use `wandb login --relogin` to force relogin)


True

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

### Loading Data

In [3]:
training_data = datasets.mnist.FashionMNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.mnist.FashionMNIST(root="data", train=False, download=True, transform=ToTensor())

In [4]:
training_data, validation_data = torch.utils.data.random_split(training_data, [50000, 10000])

In [5]:
print(len(training_data),len(validation_data),len(test_data))

50000 10000 10000


### MLP with Dropout Regularisation

Use different dropout rates for the input layer (`p_in`) and hidden layers (`p_hidden`). 

In [6]:
p_in = 0.2
p_hidden = 0.5
n_units = 200

In [7]:
from torch import nn
model = nn.Sequential(
    torch.nn.Flatten(),
    nn.Dropout(p_in),
    nn.Linear(28*28, n_units),
    nn.Dropout(p_hidden),
    nn.Sigmoid(),
    nn.Linear(n_units, 10)
)
from torchsummary import summary
summary(model, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                  [-1, 784]               0
           Dropout-2                  [-1, 784]               0
            Linear-3                  [-1, 200]         157,000
           Dropout-4                  [-1, 200]               0
           Sigmoid-5                  [-1, 200]               0
            Linear-6                   [-1, 10]           2,010
Total params: 159,010
Trainable params: 159,010
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 0.61
Estimated Total Size (MB): 0.63
----------------------------------------------------------------


In [8]:
def get_model(n_units, p_in, p_hidden):
    return nn.Sequential(
        torch.nn.Flatten(),
        nn.Dropout(p_in),
        nn.Linear(28*28, n_units),
        nn.Dropout(p_hidden),
        nn.Sigmoid(),
        nn.Linear(n_units, 10)
    )

### Training

Implement the training / evaluation loop

Remember and return training / validation cost and accuracy per epoch. 

In [9]:
def train_eval(model, lr, nepochs, nbatch, training_data, validation_data):
    
    cost_ce = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    train_loader = DataLoader(training_data, nbatch, shuffle=True)
    val_loader = DataLoader(validation_data, nbatch, shuffle=True)
    
    cost_hist = []
    cost_hist_val = []
    acc_hist = []
    acc_hist_val = []
    for epoch in range(nepochs):
        model.train()
        training_cost, correct = 0.0, 0.0
        for inputs, targets in train_loader:
            predictions = model(inputs)
            cost = cost_ce(predictions, targets)
            training_cost += cost.item()
            correct += (predictions.argmax(dim=1) == targets).type(torch.float).sum().item()
            optimizer.zero_grad()
            cost.backward()
            optimizer.step()
        training_cost /= len(train_loader)
        training_acc = correct / len(train_loader.dataset)

        model.eval()
        validation_cost, correct = 0.0, 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                predictions = model(inputs)
                validation_cost += cost_ce(predictions, targets).item()
                correct += (torch.argmax(predictions, dim=1) == targets).sum()
        validation_cost /= len(val_loader)
        validation_acc = correct / len(val_loader.dataset)
        wandb.log({"train_cost": training_cost, "train_acc": training_acc,"val_cost": validation_cost, "val_acc": validation_acc})
        cost_hist.append(training_cost)
        acc_hist.append(training_acc)
        cost_hist_val.append(validation_cost)
        acc_hist_val.append(validation_acc)
        print("Epoch {:2}: {:.2f}, {:.2f}, {:.2f}, {:.2f}"
              .format(epoch, training_cost, validation_cost, training_acc, validation_acc))
        
    
    return cost_hist, cost_hist_val, acc_hist, acc_hist_val

### Analyse Different Settings

Start with a baseline model: 200 units in a single hidden layer; batch size 64; properly tuned learning rate, no dropout.

Then play with different model complexities and dropout rates and compare them on the basis of the validation set.

Estimate also the variance error by the difference between validation and training loss / accuracy.

Finally, identify a favourite combination (model complexity, dropout rate) and compute the test accuracy. 


In [10]:
costs = {"train":[], "valid":[]}
accs =  {"train":[], "valid":[]}
nbatch = 64
nepochs = 100
lr = 0.25

complexity = [100, 300, 500]
drop_p = [0, 0.2, 0.3]

hists = [costs["train"], costs["valid"], accs["train"], accs["valid"]]
i = 1
for c in complexity:
    for p in drop_p:
        print(f"######## complexity: {c}, dropout rate: {p}")
        wandb.init(
            # Set the project where this run will be logged
            project="model_selection_FashionMNIST", 
            # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
            name=f"experiment_model_selection_{i}", 
            # Track hyperparameters and run metadata
            config={
            "learning_rate": lr,
            "dataset": "FashionMNIST",
            "epochs": nepochs,
            "p_in":p,
            "p_hidden":p,
            "hidden_units": c   
            }
        )
        
        model = get_model(n_units=c, p_in=p, p_hidden=p)
        res = train_eval(model, lr, nepochs, nbatch, training_data, validation_data)
        
        [hist.append(val) for (hist, val) in zip(hists, res)]
        wandb.finish()
        i += 1

######## complexity: 100, dropout rate: 0


Epoch  0: 0.75, 0.52, 0.74, 0.82
Epoch  1: 0.49, 0.55, 0.82, 0.79
Epoch  2: 0.44, 0.46, 0.84, 0.82
Epoch  3: 0.41, 0.49, 0.85, 0.81
Epoch  4: 0.40, 0.41, 0.86, 0.85
Epoch  5: 0.38, 0.61, 0.86, 0.76
Epoch  6: 0.37, 0.40, 0.87, 0.85
Epoch  7: 0.36, 0.37, 0.87, 0.86
Epoch  8: 0.35, 0.38, 0.87, 0.87
Epoch  9: 0.34, 0.50, 0.88, 0.80
Epoch 10: 0.33, 0.38, 0.88, 0.86
Epoch 11: 0.33, 0.37, 0.88, 0.86
Epoch 12: 0.32, 0.42, 0.88, 0.84
Epoch 13: 0.31, 0.41, 0.89, 0.85
Epoch 14: 0.31, 0.34, 0.89, 0.88
Epoch 15: 0.30, 0.37, 0.89, 0.87
Epoch 16: 0.30, 0.35, 0.89, 0.87
Epoch 17: 0.29, 0.43, 0.89, 0.83
Epoch 18: 0.29, 0.41, 0.90, 0.85
Epoch 19: 0.28, 0.34, 0.90, 0.88
Epoch 20: 0.28, 0.33, 0.90, 0.88
Epoch 21: 0.28, 0.36, 0.90, 0.87
Epoch 22: 0.27, 0.38, 0.90, 0.86
Epoch 23: 0.27, 0.34, 0.90, 0.88
Epoch 24: 0.26, 0.35, 0.90, 0.87
Epoch 25: 0.26, 0.32, 0.90, 0.88
Epoch 26: 0.26, 0.40, 0.91, 0.86
Epoch 27: 0.26, 0.38, 0.91, 0.87
Epoch 28: 0.25, 0.35, 0.91, 0.87
Epoch 29: 0.25, 0.34, 0.91, 0.87
Epoch 30: 


KeyboardInterrupt



### Suitable Output Plots

Possibly adjust to fit your needs...

In [None]:
plt.figure(1, figsize=(12,8))
for ic, c in enumerate(complexity):
    for ip, p in enumerate(drop_p):
        i = ip + ic * len(complexity)
        plt.plot(torch.arange(nepochs), costs["train"][i], color=f"C{i}", linestyle="--", label=f"train {p},{c}")
        plt.plot(torch.arange(nepochs), costs["valid"][i], color=f"C{i}", label=f"valid {p},{c}")
plt.xlabel("Epoch", fontsize=18)
plt.xlim(0, nepochs)
plt.ylim(0, 1)
plt.title("Cross-Entropy Cost", fontsize=18)
plt.legend()
plt.figure(2, figsize=(12,8))
for ic, c in enumerate(complexity):
    for ip, p in enumerate(drop_p):
        i = ip + ic * len(complexity)
        plt.plot(torch.arange(nepochs), accs["train"][i], color=f"C{i}", linestyle="--", label=f"train {p},{c}")
        plt.plot(torch.arange(nepochs), accs["valid"][i], color=f"C{i}", label=f"valid {p},{c}")
plt.xlabel("Epoch", fontsize=18)
plt.xlim(0, nepochs)
plt.ylim(0.7, 1.0)
plt.title("Accuracy", fontsize=18)
plt.legend()
plt.show()

Process wandb_internal:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.9/site-packages/wandb/sdk/internal/internal.py", line 162, in wandb_internal
    thread.join()
  File "/opt/conda/lib/python3.9/threading.py", line 1053, in join
    self._wait_for_tstate_lock()
  File "/opt/conda/lib/python3.9/threading.py", line 1069, in _wait_for_tstate_lock
    elif lock.acquire(block, timeout):
KeyboardInterrupt
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/opt/conda/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/opt/conda/lib/python3.9/multiprocessing/spawn.py", line 129, in _main
    return self._bootstrap(parent_sentinel)
  File "/opt/conda/lib/py