# Neural Networks for MNIST dataset

In [2]:
! pip install torchvision

Collecting torchvision
[?25l  Downloading https://files.pythonhosted.org/packages/02/c8/26ff0db66e6dd30a3ed2bfbceae9744359ae4cbb48864c70121a41c21ca5/torchvision-0.5.0-cp37-cp37m-macosx_10_9_x86_64.whl (438kB)
[K     |████████████████████████████████| 440kB 4.2MB/s eta 0:00:01
Installing collected packages: torchvision
Successfully installed torchvision-0.5.0


In [1]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import numpy as np

In [27]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

## Loading MNIST
Here we load the dataset and create data loaders.

In [3]:
train_ds = datasets.MNIST('../data', train=True, download=True, 
                       transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))
test_ds = datasets.MNIST('../data', train=False, download=True, 
                       transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))

In [4]:
batch_size = 32
#batch_size = 5 # for testing
kwargs = {'num_workers': 1, 'pin_memory': True} 

train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False, **kwargs)

## Looking at Examples

In [5]:
train_dl = iter(train_loader)
x, y = next(train_dl)

In [6]:
print(x.shape, y.shape)

torch.Size([32, 1, 28, 28]) torch.Size([32])


Helper method (from fast.ai)

In [7]:
def show(img, title=None):
    plt.imshow(img, interpolation='none', cmap="gray")
    if title is not None: plt.title(title)

In [8]:
# first from torch to numpy
X = x.numpy(); Y = y.numpy()
X.shape

(32, 1, 28, 28)

## Feed Forward Neural Network

In [11]:
# for the number of neurons in the hidden unit
def get_model(M = 300):
    net = nn.Sequential(nn.Linear(28*28, M),
                        nn.ReLU(),
                        nn.Linear(M, 10))
    return net #.cuda()

In [12]:
def train_model(train_loader, test_loader, num_epochs, model, optimizer):
    model.train()
    sum_loss = 0.0
    total = 0
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):  
            batch = images.shape[0] # size of the batch
            # Convert torch tensor to Variable, change shape of the input
            images = images.view(-1, 28*28) #.cuda()
        
            # Forward + Backward + Optimize
            optimizer.zero_grad()  # zero the gradient buffer
            outputs = model(images)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()
        
            total += batch
            sum_loss += batch * loss.item()
            if (i+1) % 100 == 0:
                print ('Epoch [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, sum_loss/total))
                
        train_loss = sum_loss/total
        print('Epoch [%d/%d], Loss: %.4f' %(epoch+1, num_epochs, train_loss))
        val_acc, val_loss = model_accuracy_loss(model, test_loader)
        print('Epoch [%d/%d], Valid Accuracy: %.4f, Valid Loss: %.4f' %(epoch+1, num_epochs, val_acc, val_loss))
    return val_acc, val_loss, train_loss

In [13]:
def model_accuracy_loss(model, test_loader):
    model.eval()
    correct = 0
    sum_loss = 0.0
    total = 0
    for images, labels in test_loader:
        images = images.view(-1, 28*28)  #.cuda()
        outputs = model(images)
        _, pred = torch.max(outputs.data, 1)
        loss = F.cross_entropy(outputs, labels)
        sum_loss += labels.size(0)*loss.item()
        total += labels.size(0)
        correct += pred.eq(labels.data).sum().item()
    return 100 * correct / total, sum_loss/ total

In [14]:
net = get_model()
learning_rate = 0.01
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
model_accuracy_loss(net, test_loader)
train_model(train_loader, test_loader, num_epochs=2, model=net, optimizer=optimizer)

(7.28, 2.325355977630615)

In [15]:
train_model(train_loader, test_loader, num_epochs=2, model=net, optimizer=optimizer)

Epoch [1/2], Loss: 0.7110
Epoch [1/2], Loss: 0.5608
Epoch [1/2], Loss: 0.5001
Epoch [1/2], Loss: 0.4506
Epoch [1/2], Loss: 0.4255
Epoch [1/2], Loss: 0.4100
Epoch [1/2], Loss: 0.3942
Epoch [1/2], Loss: 0.3859
Epoch [1/2], Loss: 0.3799
Epoch [1/2], Loss: 0.3730
Epoch [1/2], Loss: 0.3651
Epoch [1/2], Loss: 0.3587
Epoch [1/2], Loss: 0.3518
Epoch [1/2], Loss: 0.3489
Epoch [1/2], Loss: 0.3457
Epoch [1/2], Loss: 0.3410
Epoch [1/2], Loss: 0.3392
Epoch [1/2], Loss: 0.3374
Epoch [1/2], Loss: 0.3355
Epoch [1/2], Valid Accuracy: 92.4100, Valid Loss: 0.2869
Epoch [2/2], Loss: 0.3327
Epoch [2/2], Loss: 0.3290
Epoch [2/2], Loss: 0.3254
Epoch [2/2], Loss: 0.3241
Epoch [2/2], Loss: 0.3218
Epoch [2/2], Loss: 0.3202
Epoch [2/2], Loss: 0.3184
Epoch [2/2], Loss: 0.3156
Epoch [2/2], Loss: 0.3134
Epoch [2/2], Loss: 0.3114
Epoch [2/2], Loss: 0.3108
Epoch [2/2], Loss: 0.3102
Epoch [2/2], Loss: 0.3085
Epoch [2/2], Loss: 0.3086
Epoch [2/2], Loss: 0.3074
Epoch [2/2], Loss: 0.3064
Epoch [2/2], Loss: 0.3042
Epoch [

(93.06, 0.283778410176374, 0.3017305273246641)

### 1

In [24]:
results = []
for lr in [1, .1, .01, .001, .0001, .00001]:
    net = get_model()
    learning_rate = lr
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    model_accuracy_loss(net, test_loader)
    results.append(train_model(train_loader, test_loader, num_epochs=10, model=net, optimizer=optimizer))

Epoch [1/10], Loss: 905.9510
Epoch [1/10], Loss: 461.4354
Epoch [1/10], Loss: 312.8136
Epoch [1/10], Loss: 242.2911
Epoch [1/10], Loss: 198.2529
Epoch [1/10], Loss: 168.3026
Epoch [1/10], Loss: 146.0326
Epoch [1/10], Loss: 137.2108
Epoch [1/10], Loss: 124.8859
Epoch [1/10], Loss: 113.7471
Epoch [1/10], Loss: 104.6835
Epoch [1/10], Loss: 96.7207
Epoch [1/10], Loss: 89.5515
Epoch [1/10], Loss: 83.5419
Epoch [1/10], Loss: 78.2770
Epoch [1/10], Loss: 73.5804
Epoch [1/10], Loss: 69.4279
Epoch [1/10], Loss: 65.7044
Epoch [1/10], Loss: 63.2654
Epoch [1/10], Valid Accuracy: 9.8100, Valid Loss: 27.0273
Epoch [2/10], Loss: 60.2322
Epoch [2/10], Loss: 57.5295
Epoch [2/10], Loss: 54.9960
Epoch [2/10], Loss: 52.6838
Epoch [2/10], Loss: 50.7411
Epoch [2/10], Loss: 48.7878
Epoch [2/10], Loss: 46.9861
Epoch [2/10], Loss: 45.3193
Epoch [2/10], Loss: 43.7732
Epoch [2/10], Loss: 42.3337
Epoch [2/10], Loss: 41.0020
Epoch [2/10], Loss: 40.0938
Epoch [2/10], Loss: 38.9066
Epoch [2/10], Loss: 37.8445
Epoch [

Epoch [5/10], Loss: 2.3821
Epoch [5/10], Loss: 2.3812
Epoch [5/10], Loss: 2.3804
Epoch [5/10], Loss: 2.3796
Epoch [5/10], Loss: 2.3788
Epoch [5/10], Loss: 2.3780
Epoch [5/10], Loss: 2.3778
Epoch [5/10], Loss: 2.3870
Epoch [5/10], Loss: 2.3866
Epoch [5/10], Loss: 2.3857
Epoch [5/10], Loss: 2.3849
Epoch [5/10], Loss: 2.3841
Epoch [5/10], Loss: 2.3833
Epoch [5/10], Loss: 2.3825
Epoch [5/10], Loss: 2.3823
Epoch [5/10], Loss: 2.3815
Epoch [5/10], Loss: 2.3808
Epoch [5/10], Loss: 2.3802
Epoch [5/10], Valid Accuracy: 11.3700, Valid Loss: 2.3471
Epoch [6/10], Loss: 2.3795
Epoch [6/10], Loss: 2.3788
Epoch [6/10], Loss: 2.3784
Epoch [6/10], Loss: 2.3777
Epoch [6/10], Loss: 2.3771
Epoch [6/10], Loss: 2.3768
Epoch [6/10], Loss: 2.3762
Epoch [6/10], Loss: 2.3756
Epoch [6/10], Loss: 2.3750
Epoch [6/10], Loss: 2.3744
Epoch [6/10], Loss: 2.3738
Epoch [6/10], Loss: 2.3733
Epoch [6/10], Loss: 2.3727
Epoch [6/10], Loss: 2.3721
Epoch [6/10], Loss: 2.3716
Epoch [6/10], Loss: 2.3711
Epoch [6/10], Loss: 2.37

Epoch [9/10], Loss: 0.2463
Epoch [9/10], Loss: 0.2462
Epoch [9/10], Loss: 0.2463
Epoch [9/10], Loss: 0.2463
Epoch [9/10], Loss: 0.2462
Epoch [9/10], Loss: 0.2464
Epoch [9/10], Loss: 0.2462
Epoch [9/10], Loss: 0.2461
Epoch [9/10], Loss: 0.2462
Epoch [9/10], Loss: 0.2461
Epoch [9/10], Loss: 0.2458
Epoch [9/10], Valid Accuracy: 94.2500, Valid Loss: 0.3569
Epoch [10/10], Loss: 0.2456
Epoch [10/10], Loss: 0.2452
Epoch [10/10], Loss: 0.2450
Epoch [10/10], Loss: 0.2450
Epoch [10/10], Loss: 0.2447
Epoch [10/10], Loss: 0.2445
Epoch [10/10], Loss: 0.2441
Epoch [10/10], Loss: 0.2438
Epoch [10/10], Loss: 0.2437
Epoch [10/10], Loss: 0.2436
Epoch [10/10], Loss: 0.2435
Epoch [10/10], Loss: 0.2436
Epoch [10/10], Loss: 0.2435
Epoch [10/10], Loss: 0.2435
Epoch [10/10], Loss: 0.2433
Epoch [10/10], Loss: 0.2432
Epoch [10/10], Loss: 0.2430
Epoch [10/10], Loss: 0.2430
Epoch [10/10], Loss: 0.2428
Epoch [10/10], Valid Accuracy: 94.3200, Valid Loss: 0.3747
Epoch [1/10], Loss: 0.6334
Epoch [1/10], Loss: 0.4861


Epoch [3/10], Loss: 0.2591
Epoch [3/10], Loss: 0.2568
Epoch [3/10], Loss: 0.2545
Epoch [3/10], Loss: 0.2522
Epoch [3/10], Loss: 0.2507
Epoch [3/10], Valid Accuracy: 96.2200, Valid Loss: 0.1283
Epoch [4/10], Loss: 0.2485
Epoch [4/10], Loss: 0.2467
Epoch [4/10], Loss: 0.2447
Epoch [4/10], Loss: 0.2427
Epoch [4/10], Loss: 0.2405
Epoch [4/10], Loss: 0.2384
Epoch [4/10], Loss: 0.2363
Epoch [4/10], Loss: 0.2343
Epoch [4/10], Loss: 0.2326
Epoch [4/10], Loss: 0.2310
Epoch [4/10], Loss: 0.2292
Epoch [4/10], Loss: 0.2277
Epoch [4/10], Loss: 0.2260
Epoch [4/10], Loss: 0.2243
Epoch [4/10], Loss: 0.2225
Epoch [4/10], Loss: 0.2210
Epoch [4/10], Loss: 0.2195
Epoch [4/10], Loss: 0.2179
Epoch [4/10], Loss: 0.2169
Epoch [4/10], Valid Accuracy: 96.7500, Valid Loss: 0.1099
Epoch [5/10], Loss: 0.2153
Epoch [5/10], Loss: 0.2136
Epoch [5/10], Loss: 0.2122
Epoch [5/10], Loss: 0.2108
Epoch [5/10], Loss: 0.2094
Epoch [5/10], Loss: 0.2080
Epoch [5/10], Loss: 0.2066
Epoch [5/10], Loss: 0.2052
Epoch [5/10], Loss: 

Epoch [8/10], Loss: 0.4495
Epoch [8/10], Loss: 0.4481
Epoch [8/10], Loss: 0.4467
Epoch [8/10], Loss: 0.4453
Epoch [8/10], Loss: 0.4440
Epoch [8/10], Loss: 0.4425
Epoch [8/10], Loss: 0.4411
Epoch [8/10], Loss: 0.4398
Epoch [8/10], Loss: 0.4384
Epoch [8/10], Loss: 0.4371
Epoch [8/10], Loss: 0.4358
Epoch [8/10], Loss: 0.4344
Epoch [8/10], Loss: 0.4331
Epoch [8/10], Loss: 0.4318
Epoch [8/10], Loss: 0.4305
Epoch [8/10], Loss: 0.4293
Epoch [8/10], Loss: 0.4280
Epoch [8/10], Loss: 0.4268
Epoch [8/10], Loss: 0.4260
Epoch [8/10], Valid Accuracy: 92.9900, Valid Loss: 0.2410
Epoch [9/10], Loss: 0.4248
Epoch [9/10], Loss: 0.4237
Epoch [9/10], Loss: 0.4225
Epoch [9/10], Loss: 0.4213
Epoch [9/10], Loss: 0.4201
Epoch [9/10], Loss: 0.4189
Epoch [9/10], Loss: 0.4178
Epoch [9/10], Loss: 0.4166
Epoch [9/10], Loss: 0.4156
Epoch [9/10], Loss: 0.4144
Epoch [9/10], Loss: 0.4133
Epoch [9/10], Loss: 0.4123
Epoch [9/10], Loss: 0.4112
Epoch [9/10], Loss: 0.4101
Epoch [9/10], Loss: 0.4090
Epoch [9/10], Loss: 0.40

In [23]:
results

[(11.37, 147.05867141189574, 28.000077380212147),
 (10.28, 2.3135378803253173, 2.4611276170094807),
 (94.35, 0.23991573314219714, 0.3008269916428874),
 (97.49, 0.08214977166764438, 0.14398929282476505),
 (95.31, 0.1625918412655592, 0.300688506800433),
 (90.15, 0.38861375675201415, 0.8012183931589126)]

In [40]:
lr = [1, .1, .01, .001, .0001, .00001]
res1 = pd.DataFrame(results)
res1.columns = ["Valid_Acc", "Valid_Loss", "Train_Loss"]
res1.insert(0, "Learning Rate", lr)
res1

Unnamed: 0,Learning Rate,Valid_Acc,Valid_Loss,Train_Loss
0,1.0,9.61,2.591198,8.609349
1,0.1,11.37,2.314166,2.34723
2,0.01,94.32,0.37465,0.242768
3,0.001,97.64,0.118911,0.056752
4,0.0001,97.81,0.070849,0.125028
5,1e-05,93.67,0.220018,0.387639


From the table above, we can tell that the best learning rate is between 0.001 and 0.0001 for this model.

### 2

In [25]:
hidden = [10, 50, 100, 300, 1000, 2000]

results_2 = []
for hid in hidden:
    net = get_model(M = hid)
    optimizer = optim.Adam(net.parameters(), lr=0.01)
    model_accuracy_loss(net, test_loader)
    results_2.append(train_model(train_loader, test_loader, num_epochs=10, model=net, optimizer=optimizer))


Epoch [1/10], Loss: 0.9206
Epoch [1/10], Loss: 0.7523
Epoch [1/10], Loss: 0.6853
Epoch [1/10], Loss: 0.6438
Epoch [1/10], Loss: 0.6115
Epoch [1/10], Loss: 0.5924
Epoch [1/10], Loss: 0.5827
Epoch [1/10], Loss: 0.5717
Epoch [1/10], Loss: 0.5644
Epoch [1/10], Loss: 0.5560
Epoch [1/10], Loss: 0.5463
Epoch [1/10], Loss: 0.5396
Epoch [1/10], Loss: 0.5330
Epoch [1/10], Loss: 0.5279
Epoch [1/10], Loss: 0.5228
Epoch [1/10], Loss: 0.5191
Epoch [1/10], Loss: 0.5144
Epoch [1/10], Loss: 0.5119
Epoch [1/10], Loss: 0.5082
Epoch [1/10], Valid Accuracy: 88.1400, Valid Loss: 0.4284
Epoch [2/10], Loss: 0.5038
Epoch [2/10], Loss: 0.4997
Epoch [2/10], Loss: 0.4957
Epoch [2/10], Loss: 0.4910
Epoch [2/10], Loss: 0.4885
Epoch [2/10], Loss: 0.4852
Epoch [2/10], Loss: 0.4843
Epoch [2/10], Loss: 0.4826
Epoch [2/10], Loss: 0.4808
Epoch [2/10], Loss: 0.4779
Epoch [2/10], Loss: 0.4762
Epoch [2/10], Loss: 0.4746
Epoch [2/10], Loss: 0.4725
Epoch [2/10], Loss: 0.4701
Epoch [2/10], Loss: 0.4684
Epoch [2/10], Loss: 0.46

Epoch [5/10], Loss: 0.2761
Epoch [5/10], Loss: 0.2759
Epoch [5/10], Loss: 0.2755
Epoch [5/10], Loss: 0.2754
Epoch [5/10], Loss: 0.2757
Epoch [5/10], Loss: 0.2755
Epoch [5/10], Loss: 0.2752
Epoch [5/10], Loss: 0.2750
Epoch [5/10], Loss: 0.2746
Epoch [5/10], Loss: 0.2744
Epoch [5/10], Loss: 0.2744
Epoch [5/10], Loss: 0.2744
Epoch [5/10], Valid Accuracy: 93.7700, Valid Loss: 0.2909
Epoch [6/10], Loss: 0.2741
Epoch [6/10], Loss: 0.2737
Epoch [6/10], Loss: 0.2729
Epoch [6/10], Loss: 0.2728
Epoch [6/10], Loss: 0.2723
Epoch [6/10], Loss: 0.2717
Epoch [6/10], Loss: 0.2716
Epoch [6/10], Loss: 0.2711
Epoch [6/10], Loss: 0.2711
Epoch [6/10], Loss: 0.2712
Epoch [6/10], Loss: 0.2710
Epoch [6/10], Loss: 0.2705
Epoch [6/10], Loss: 0.2704
Epoch [6/10], Loss: 0.2708
Epoch [6/10], Loss: 0.2705
Epoch [6/10], Loss: 0.2702
Epoch [6/10], Loss: 0.2702
Epoch [6/10], Loss: 0.2696
Epoch [6/10], Loss: 0.2692
Epoch [6/10], Valid Accuracy: 94.4300, Valid Loss: 0.2681
Epoch [7/10], Loss: 0.2689
Epoch [7/10], Loss: 

Epoch [9/10], Loss: 0.2475
Epoch [9/10], Loss: 0.2473
Epoch [9/10], Loss: 0.2471
Epoch [9/10], Loss: 0.2473
Epoch [9/10], Loss: 0.2472
Epoch [9/10], Valid Accuracy: 93.3100, Valid Loss: 0.3439
Epoch [10/10], Loss: 0.2469
Epoch [10/10], Loss: 0.2467
Epoch [10/10], Loss: 0.2463
Epoch [10/10], Loss: 0.2459
Epoch [10/10], Loss: 0.2457
Epoch [10/10], Loss: 0.2453
Epoch [10/10], Loss: 0.2449
Epoch [10/10], Loss: 0.2449
Epoch [10/10], Loss: 0.2451
Epoch [10/10], Loss: 0.2448
Epoch [10/10], Loss: 0.2448
Epoch [10/10], Loss: 0.2446
Epoch [10/10], Loss: 0.2444
Epoch [10/10], Loss: 0.2443
Epoch [10/10], Loss: 0.2442
Epoch [10/10], Loss: 0.2441
Epoch [10/10], Loss: 0.2443
Epoch [10/10], Loss: 0.2440
Epoch [10/10], Loss: 0.2439
Epoch [10/10], Valid Accuracy: 94.3500, Valid Loss: 0.3165
Epoch [1/10], Loss: 0.7146
Epoch [1/10], Loss: 0.5764
Epoch [1/10], Loss: 0.4973
Epoch [1/10], Loss: 0.4565
Epoch [1/10], Loss: 0.4478
Epoch [1/10], Loss: 0.4244
Epoch [1/10], Loss: 0.4103
Epoch [1/10], Loss: 0.4018


Epoch [4/10], Loss: 0.2983
Epoch [4/10], Loss: 0.2972
Epoch [4/10], Loss: 0.2960
Epoch [4/10], Loss: 0.2951
Epoch [4/10], Loss: 0.2944
Epoch [4/10], Loss: 0.2939
Epoch [4/10], Loss: 0.2931
Epoch [4/10], Loss: 0.2924
Epoch [4/10], Loss: 0.2920
Epoch [4/10], Loss: 0.2912
Epoch [4/10], Loss: 0.2905
Epoch [4/10], Loss: 0.2894
Epoch [4/10], Loss: 0.2895
Epoch [4/10], Loss: 0.2888
Epoch [4/10], Loss: 0.2887
Epoch [4/10], Loss: 0.2880
Epoch [4/10], Loss: 0.2879
Epoch [4/10], Loss: 0.2876
Epoch [4/10], Loss: 0.2875
Epoch [4/10], Valid Accuracy: 93.6100, Valid Loss: 0.2868
Epoch [5/10], Loss: 0.2868
Epoch [5/10], Loss: 0.2859
Epoch [5/10], Loss: 0.2849
Epoch [5/10], Loss: 0.2842
Epoch [5/10], Loss: 0.2831
Epoch [5/10], Loss: 0.2821
Epoch [5/10], Loss: 0.2815
Epoch [5/10], Loss: 0.2809
Epoch [5/10], Loss: 0.2803
Epoch [5/10], Loss: 0.2797
Epoch [5/10], Loss: 0.2792
Epoch [5/10], Loss: 0.2789
Epoch [5/10], Loss: 0.2784
Epoch [5/10], Loss: 0.2777
Epoch [5/10], Loss: 0.2774
Epoch [5/10], Loss: 0.27

Epoch [8/10], Loss: 0.2637
Epoch [8/10], Loss: 0.2634
Epoch [8/10], Loss: 0.2632
Epoch [8/10], Loss: 0.2630
Epoch [8/10], Loss: 0.2629
Epoch [8/10], Loss: 0.2625
Epoch [8/10], Loss: 0.2624
Epoch [8/10], Loss: 0.2624
Epoch [8/10], Loss: 0.2622
Epoch [8/10], Loss: 0.2621
Epoch [8/10], Loss: 0.2614
Epoch [8/10], Loss: 0.2613
Epoch [8/10], Valid Accuracy: 93.6400, Valid Loss: 0.3668
Epoch [9/10], Loss: 0.2607
Epoch [9/10], Loss: 0.2603
Epoch [9/10], Loss: 0.2601
Epoch [9/10], Loss: 0.2599
Epoch [9/10], Loss: 0.2598
Epoch [9/10], Loss: 0.2595
Epoch [9/10], Loss: 0.2592
Epoch [9/10], Loss: 0.2590
Epoch [9/10], Loss: 0.2587
Epoch [9/10], Loss: 0.2584
Epoch [9/10], Loss: 0.2580
Epoch [9/10], Loss: 0.2577
Epoch [9/10], Loss: 0.2574
Epoch [9/10], Loss: 0.2572
Epoch [9/10], Loss: 0.2569
Epoch [9/10], Loss: 0.2568
Epoch [9/10], Loss: 0.2564
Epoch [9/10], Loss: 0.2562
Epoch [9/10], Loss: 0.2562
Epoch [9/10], Valid Accuracy: 94.0600, Valid Loss: 0.3680
Epoch [10/10], Loss: 0.2559
Epoch [10/10], Loss

In [26]:
results_2

[(88.69, 0.4148162976861, 0.40165423086394864),
 (93.49, 0.361723963208776, 0.2537148560931658),
 (94.35, 0.3164770141951565, 0.24390024335888252),
 (94.53, 0.30357792531363664, 0.24904476823841104),
 (93.77, 0.37402034383420835, 0.24759875070346588),
 (94.16, 0.35299421441166995, 0.2526245105704215)]

In [41]:
neurons = [10, 50, 100, 300, 1000, 2000]
res2 = pd.DataFrame(results_2)
res2.columns = ["Valid_Acc", "Valid_Loss", "Train_Loss"]
res2.insert(0, "Hidden Layer Size", neurons)
res2

Unnamed: 0,Hidden Layer Size,Valid_Acc,Valid_Loss,Train_Loss
0,10,88.69,0.414816,0.401654
1,50,93.49,0.361724,0.253715
2,100,94.35,0.316477,0.2439
3,300,94.53,0.303578,0.249045
4,1000,93.77,0.37402,0.247599
5,2000,94.16,0.352994,0.252625


The hidden layer size of 300 gives us the best validation accuracy. 

Hidden layers of size 100 and 1000 may have overfitting, because the training loss of these two models are lower than the best model but their validation loss are higher.

## Models with L2 regularization
To add L2 regularization use the `weight_decay` argument on the optimizer

In [42]:
optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay = 0.01)

### 3

In [35]:
results_3 = []
weights = [0,0.0001,0.001,0.01,0.1,0.3]
for decay in weights:
    net = get_model(M = 300)
    optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay = decay)
    model_accuracy_loss(net, test_loader)
    results_3.append(train_model(train_loader, test_loader, num_epochs=20, model=net, optimizer=optimizer))

Epoch [1/20], Loss: 0.6504
Epoch [1/20], Loss: 0.4956
Epoch [1/20], Loss: 0.4220
Epoch [1/20], Loss: 0.3801
Epoch [1/20], Loss: 0.3491
Epoch [1/20], Loss: 0.3254
Epoch [1/20], Loss: 0.3032
Epoch [1/20], Loss: 0.2867
Epoch [1/20], Loss: 0.2728
Epoch [1/20], Loss: 0.2627
Epoch [1/20], Loss: 0.2510
Epoch [1/20], Loss: 0.2401
Epoch [1/20], Loss: 0.2326
Epoch [1/20], Loss: 0.2242
Epoch [1/20], Loss: 0.2180
Epoch [1/20], Loss: 0.2123
Epoch [1/20], Loss: 0.2060
Epoch [1/20], Loss: 0.2007
Epoch [1/20], Loss: 0.1979
Epoch [1/20], Valid Accuracy: 95.6200, Valid Loss: 0.1367
Epoch [2/20], Loss: 0.1924
Epoch [2/20], Loss: 0.1873
Epoch [2/20], Loss: 0.1824
Epoch [2/20], Loss: 0.1782
Epoch [2/20], Loss: 0.1747
Epoch [2/20], Loss: 0.1716
Epoch [2/20], Loss: 0.1684
Epoch [2/20], Loss: 0.1652
Epoch [2/20], Loss: 0.1628
Epoch [2/20], Loss: 0.1596
Epoch [2/20], Loss: 0.1573
Epoch [2/20], Loss: 0.1549
Epoch [2/20], Loss: 0.1529
Epoch [2/20], Loss: 0.1507
Epoch [2/20], Loss: 0.1484
Epoch [2/20], Loss: 0.14

Epoch [15/20], Loss: 0.0447
Epoch [15/20], Loss: 0.0446
Epoch [15/20], Loss: 0.0445
Epoch [15/20], Loss: 0.0444
Epoch [15/20], Loss: 0.0443
Epoch [15/20], Loss: 0.0442
Epoch [15/20], Loss: 0.0441
Epoch [15/20], Loss: 0.0441
Epoch [15/20], Loss: 0.0440
Epoch [15/20], Loss: 0.0439
Epoch [15/20], Loss: 0.0438
Epoch [15/20], Loss: 0.0437
Epoch [15/20], Loss: 0.0436
Epoch [15/20], Loss: 0.0436
Epoch [15/20], Loss: 0.0435
Epoch [15/20], Valid Accuracy: 97.9300, Valid Loss: 0.1271
Epoch [16/20], Loss: 0.0434
Epoch [16/20], Loss: 0.0433
Epoch [16/20], Loss: 0.0432
Epoch [16/20], Loss: 0.0431
Epoch [16/20], Loss: 0.0430
Epoch [16/20], Loss: 0.0429
Epoch [16/20], Loss: 0.0428
Epoch [16/20], Loss: 0.0426
Epoch [16/20], Loss: 0.0425
Epoch [16/20], Loss: 0.0424
Epoch [16/20], Loss: 0.0423
Epoch [16/20], Loss: 0.0423
Epoch [16/20], Loss: 0.0422
Epoch [16/20], Loss: 0.0421
Epoch [16/20], Loss: 0.0419
Epoch [16/20], Loss: 0.0418
Epoch [16/20], Loss: 0.0417
Epoch [16/20], Loss: 0.0417
Epoch [16/20], Lo

Epoch [9/20], Loss: 0.0686
Epoch [9/20], Loss: 0.0683
Epoch [9/20], Loss: 0.0681
Epoch [9/20], Loss: 0.0679
Epoch [9/20], Loss: 0.0676
Epoch [9/20], Loss: 0.0674
Epoch [9/20], Loss: 0.0672
Epoch [9/20], Loss: 0.0669
Epoch [9/20], Loss: 0.0666
Epoch [9/20], Loss: 0.0664
Epoch [9/20], Loss: 0.0662
Epoch [9/20], Valid Accuracy: 97.1600, Valid Loss: 0.1134
Epoch [10/20], Loss: 0.0661
Epoch [10/20], Loss: 0.0659
Epoch [10/20], Loss: 0.0657
Epoch [10/20], Loss: 0.0654
Epoch [10/20], Loss: 0.0652
Epoch [10/20], Loss: 0.0650
Epoch [10/20], Loss: 0.0648
Epoch [10/20], Loss: 0.0645
Epoch [10/20], Loss: 0.0643
Epoch [10/20], Loss: 0.0641
Epoch [10/20], Loss: 0.0639
Epoch [10/20], Loss: 0.0638
Epoch [10/20], Loss: 0.0636
Epoch [10/20], Loss: 0.0635
Epoch [10/20], Loss: 0.0633
Epoch [10/20], Loss: 0.0631
Epoch [10/20], Loss: 0.0629
Epoch [10/20], Loss: 0.0628
Epoch [10/20], Loss: 0.0626
Epoch [10/20], Valid Accuracy: 97.6100, Valid Loss: 0.0878
Epoch [11/20], Loss: 0.0624
Epoch [11/20], Loss: 0.062

Epoch [3/20], Loss: 0.1453
Epoch [3/20], Loss: 0.1445
Epoch [3/20], Loss: 0.1433
Epoch [3/20], Loss: 0.1421
Epoch [3/20], Loss: 0.1411
Epoch [3/20], Loss: 0.1400
Epoch [3/20], Loss: 0.1388
Epoch [3/20], Loss: 0.1378
Epoch [3/20], Loss: 0.1369
Epoch [3/20], Loss: 0.1362
Epoch [3/20], Loss: 0.1358
Epoch [3/20], Valid Accuracy: 96.3000, Valid Loss: 0.1214
Epoch [4/20], Loss: 0.1347
Epoch [4/20], Loss: 0.1336
Epoch [4/20], Loss: 0.1328
Epoch [4/20], Loss: 0.1320
Epoch [4/20], Loss: 0.1314
Epoch [4/20], Loss: 0.1307
Epoch [4/20], Loss: 0.1301
Epoch [4/20], Loss: 0.1295
Epoch [4/20], Loss: 0.1289
Epoch [4/20], Loss: 0.1280
Epoch [4/20], Loss: 0.1274
Epoch [4/20], Loss: 0.1268
Epoch [4/20], Loss: 0.1262
Epoch [4/20], Loss: 0.1256
Epoch [4/20], Loss: 0.1253
Epoch [4/20], Loss: 0.1247
Epoch [4/20], Loss: 0.1243
Epoch [4/20], Loss: 0.1239
Epoch [4/20], Loss: 0.1235
Epoch [4/20], Valid Accuracy: 97.1000, Valid Loss: 0.0935
Epoch [5/20], Loss: 0.1229
Epoch [5/20], Loss: 0.1222
Epoch [5/20], Loss: 

Epoch [17/20], Loss: 0.0817
Epoch [17/20], Loss: 0.0816
Epoch [17/20], Loss: 0.0816
Epoch [17/20], Loss: 0.0815
Epoch [17/20], Loss: 0.0815
Epoch [17/20], Loss: 0.0815
Epoch [17/20], Loss: 0.0814
Epoch [17/20], Loss: 0.0814
Epoch [17/20], Loss: 0.0814
Epoch [17/20], Valid Accuracy: 97.0500, Valid Loss: 0.0949
Epoch [18/20], Loss: 0.0813
Epoch [18/20], Loss: 0.0812
Epoch [18/20], Loss: 0.0812
Epoch [18/20], Loss: 0.0811
Epoch [18/20], Loss: 0.0810
Epoch [18/20], Loss: 0.0810
Epoch [18/20], Loss: 0.0809
Epoch [18/20], Loss: 0.0808
Epoch [18/20], Loss: 0.0808
Epoch [18/20], Loss: 0.0808
Epoch [18/20], Loss: 0.0807
Epoch [18/20], Loss: 0.0806
Epoch [18/20], Loss: 0.0806
Epoch [18/20], Loss: 0.0806
Epoch [18/20], Loss: 0.0805
Epoch [18/20], Loss: 0.0805
Epoch [18/20], Loss: 0.0804
Epoch [18/20], Loss: 0.0804
Epoch [18/20], Loss: 0.0803
Epoch [18/20], Valid Accuracy: 96.8600, Valid Loss: 0.0934
Epoch [19/20], Loss: 0.0803
Epoch [19/20], Loss: 0.0802
Epoch [19/20], Loss: 0.0802
Epoch [19/20],

Epoch [11/20], Loss: 0.1860
Epoch [11/20], Loss: 0.1859
Epoch [11/20], Loss: 0.1857
Epoch [11/20], Loss: 0.1857
Epoch [11/20], Loss: 0.1856
Epoch [11/20], Valid Accuracy: 95.4500, Valid Loss: 0.1669
Epoch [12/20], Loss: 0.1855
Epoch [12/20], Loss: 0.1854
Epoch [12/20], Loss: 0.1852
Epoch [12/20], Loss: 0.1851
Epoch [12/20], Loss: 0.1850
Epoch [12/20], Loss: 0.1849
Epoch [12/20], Loss: 0.1849
Epoch [12/20], Loss: 0.1849
Epoch [12/20], Loss: 0.1847
Epoch [12/20], Loss: 0.1847
Epoch [12/20], Loss: 0.1846
Epoch [12/20], Loss: 0.1844
Epoch [12/20], Loss: 0.1844
Epoch [12/20], Loss: 0.1844
Epoch [12/20], Loss: 0.1843
Epoch [12/20], Loss: 0.1843
Epoch [12/20], Loss: 0.1842
Epoch [12/20], Loss: 0.1842
Epoch [12/20], Loss: 0.1841
Epoch [12/20], Valid Accuracy: 95.7000, Valid Loss: 0.1622
Epoch [13/20], Loss: 0.1840
Epoch [13/20], Loss: 0.1839
Epoch [13/20], Loss: 0.1839
Epoch [13/20], Loss: 0.1838
Epoch [13/20], Loss: 0.1838
Epoch [13/20], Loss: 0.1837
Epoch [13/20], Loss: 0.1836
Epoch [13/20],

Epoch [5/20], Loss: 0.4947
Epoch [5/20], Loss: 0.4945
Epoch [5/20], Loss: 0.4944
Epoch [5/20], Loss: 0.4942
Epoch [5/20], Valid Accuracy: 89.4000, Valid Loss: 0.4594
Epoch [6/20], Loss: 0.4939
Epoch [6/20], Loss: 0.4937
Epoch [6/20], Loss: 0.4935
Epoch [6/20], Loss: 0.4933
Epoch [6/20], Loss: 0.4929
Epoch [6/20], Loss: 0.4927
Epoch [6/20], Loss: 0.4922
Epoch [6/20], Loss: 0.4920
Epoch [6/20], Loss: 0.4917
Epoch [6/20], Loss: 0.4916
Epoch [6/20], Loss: 0.4916
Epoch [6/20], Loss: 0.4915
Epoch [6/20], Loss: 0.4913
Epoch [6/20], Loss: 0.4913
Epoch [6/20], Loss: 0.4912
Epoch [6/20], Loss: 0.4912
Epoch [6/20], Loss: 0.4909
Epoch [6/20], Loss: 0.4907
Epoch [6/20], Loss: 0.4905
Epoch [6/20], Valid Accuracy: 89.3600, Valid Loss: 0.4548
Epoch [7/20], Loss: 0.4906
Epoch [7/20], Loss: 0.4906
Epoch [7/20], Loss: 0.4904
Epoch [7/20], Loss: 0.4904
Epoch [7/20], Loss: 0.4902
Epoch [7/20], Loss: 0.4900
Epoch [7/20], Loss: 0.4898
Epoch [7/20], Loss: 0.4894
Epoch [7/20], Loss: 0.4894
Epoch [7/20], Loss: 

Epoch [19/20], Loss: 0.4745
Epoch [19/20], Loss: 0.4745
Epoch [19/20], Loss: 0.4746
Epoch [19/20], Valid Accuracy: 89.5300, Valid Loss: 0.4435
Epoch [20/20], Loss: 0.4746
Epoch [20/20], Loss: 0.4745
Epoch [20/20], Loss: 0.4745
Epoch [20/20], Loss: 0.4745
Epoch [20/20], Loss: 0.4744
Epoch [20/20], Loss: 0.4744
Epoch [20/20], Loss: 0.4743
Epoch [20/20], Loss: 0.4743
Epoch [20/20], Loss: 0.4743
Epoch [20/20], Loss: 0.4743
Epoch [20/20], Loss: 0.4744
Epoch [20/20], Loss: 0.4744
Epoch [20/20], Loss: 0.4744
Epoch [20/20], Loss: 0.4743
Epoch [20/20], Loss: 0.4742
Epoch [20/20], Loss: 0.4743
Epoch [20/20], Loss: 0.4742
Epoch [20/20], Loss: 0.4742
Epoch [20/20], Loss: 0.4742
Epoch [20/20], Valid Accuracy: 89.6900, Valid Loss: 0.4466
Epoch [1/20], Loss: 1.0161
Epoch [1/20], Loss: 0.9489
Epoch [1/20], Loss: 0.9186
Epoch [1/20], Loss: 0.9087
Epoch [1/20], Loss: 0.9052
Epoch [1/20], Loss: 0.8966
Epoch [1/20], Loss: 0.8920
Epoch [1/20], Loss: 0.8915
Epoch [1/20], Loss: 0.8887
Epoch [1/20], Loss: 0.8

Epoch [14/20], Loss: 0.8249
Epoch [14/20], Loss: 0.8248
Epoch [14/20], Loss: 0.8248
Epoch [14/20], Loss: 0.8247
Epoch [14/20], Loss: 0.8246
Epoch [14/20], Loss: 0.8246
Epoch [14/20], Loss: 0.8245
Epoch [14/20], Loss: 0.8244
Epoch [14/20], Loss: 0.8243
Epoch [14/20], Loss: 0.8242
Epoch [14/20], Loss: 0.8242
Epoch [14/20], Loss: 0.8243
Epoch [14/20], Loss: 0.8243
Epoch [14/20], Loss: 0.8243
Epoch [14/20], Loss: 0.8242
Epoch [14/20], Loss: 0.8242
Epoch [14/20], Loss: 0.8242
Epoch [14/20], Loss: 0.8243
Epoch [14/20], Loss: 0.8243
Epoch [14/20], Valid Accuracy: 84.7100, Valid Loss: 0.8037
Epoch [15/20], Loss: 0.8243
Epoch [15/20], Loss: 0.8242
Epoch [15/20], Loss: 0.8243
Epoch [15/20], Loss: 0.8242
Epoch [15/20], Loss: 0.8242
Epoch [15/20], Loss: 0.8242
Epoch [15/20], Loss: 0.8241
Epoch [15/20], Loss: 0.8242
Epoch [15/20], Loss: 0.8241
Epoch [15/20], Loss: 0.8241
Epoch [15/20], Loss: 0.8241
Epoch [15/20], Loss: 0.8240
Epoch [15/20], Loss: 0.8241
Epoch [15/20], Loss: 0.8240
Epoch [15/20], Lo

In [43]:
weights = [0,0.0001,0.001,0.01,0.1,0.3]
res3 = pd.DataFrame(results_3)
res3.columns = ["Valid_Acc", "Valid_Loss", "Train_Loss"]
res3.insert(0, "Weight Decay", weights)
res3

Unnamed: 0,Weight Decay,Valid_Acc,Valid_Loss,Train_Loss
0,0.0,98.15,0.134466,0.036126
1,0.0001,97.85,0.08554,0.044181
2,0.001,97.58,0.075122,0.078494
3,0.01,95.87,0.15255,0.176879
4,0.1,89.69,0.446578,0.474168
5,0.3,85.32,0.787871,0.821641


The weight decay actually didn't improve our 2-layer neural network model in terms of accuracy, but it lowered the validation loss so I assume it's helpful.

## Models with Dropout

In [37]:
def get_model_v2(M = 300, p=0):
    modules = []
    modules.append(nn.Linear(28*28, M))
    modules.append(nn.ReLU())
    if p > 0:
        modules.append(nn.Dropout(p))
    modules.append(nn.Linear(M, 10))
    return nn.Sequential(*modules) #.cuda()

In [38]:
results_4 = []
for drop in [0, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]:
    net = get_model_v2(M = 300, p=drop)
    optimizer = optim.Adam(net.parameters(), lr=0.01)
    model_accuracy_loss(net, test_loader)
    res = train_model(train_loader, test_loader, num_epochs=20, model=net, optimizer=optimizer)
    results_4.append(res)

Epoch [1/20], Loss: 0.7555
Epoch [1/20], Loss: 0.5714
Epoch [1/20], Loss: 0.5229
Epoch [1/20], Loss: 0.4722
Epoch [1/20], Loss: 0.4468
Epoch [1/20], Loss: 0.4294
Epoch [1/20], Loss: 0.4151
Epoch [1/20], Loss: 0.4022
Epoch [1/20], Loss: 0.3968
Epoch [1/20], Loss: 0.3874
Epoch [1/20], Loss: 0.3746
Epoch [1/20], Loss: 0.3685
Epoch [1/20], Loss: 0.3618
Epoch [1/20], Loss: 0.3553
Epoch [1/20], Loss: 0.3509
Epoch [1/20], Loss: 0.3481
Epoch [1/20], Loss: 0.3458
Epoch [1/20], Loss: 0.3433
Epoch [1/20], Loss: 0.3421
Epoch [1/20], Valid Accuracy: 93.6300, Valid Loss: 0.2513
Epoch [2/20], Loss: 0.3354
Epoch [2/20], Loss: 0.3315
Epoch [2/20], Loss: 0.3291
Epoch [2/20], Loss: 0.3255
Epoch [2/20], Loss: 0.3227
Epoch [2/20], Loss: 0.3193
Epoch [2/20], Loss: 0.3162
Epoch [2/20], Loss: 0.3131
Epoch [2/20], Loss: 0.3105
Epoch [2/20], Loss: 0.3091
Epoch [2/20], Loss: 0.3076
Epoch [2/20], Loss: 0.3066
Epoch [2/20], Loss: 0.3035
Epoch [2/20], Loss: 0.3012
Epoch [2/20], Loss: 0.2998
Epoch [2/20], Loss: 0.29

Epoch [15/20], Loss: 0.2273
Epoch [15/20], Loss: 0.2271
Epoch [15/20], Loss: 0.2268
Epoch [15/20], Loss: 0.2268
Epoch [15/20], Loss: 0.2266
Epoch [15/20], Loss: 0.2265
Epoch [15/20], Loss: 0.2264
Epoch [15/20], Loss: 0.2263
Epoch [15/20], Loss: 0.2263
Epoch [15/20], Loss: 0.2261
Epoch [15/20], Loss: 0.2261
Epoch [15/20], Loss: 0.2260
Epoch [15/20], Loss: 0.2260
Epoch [15/20], Loss: 0.2260
Epoch [15/20], Loss: 0.2260
Epoch [15/20], Valid Accuracy: 93.6800, Valid Loss: 0.3784
Epoch [16/20], Loss: 0.2258
Epoch [16/20], Loss: 0.2257
Epoch [16/20], Loss: 0.2256
Epoch [16/20], Loss: 0.2255
Epoch [16/20], Loss: 0.2253
Epoch [16/20], Loss: 0.2252
Epoch [16/20], Loss: 0.2251
Epoch [16/20], Loss: 0.2250
Epoch [16/20], Loss: 0.2249
Epoch [16/20], Loss: 0.2248
Epoch [16/20], Loss: 0.2247
Epoch [16/20], Loss: 0.2246
Epoch [16/20], Loss: 0.2246
Epoch [16/20], Loss: 0.2244
Epoch [16/20], Loss: 0.2244
Epoch [16/20], Loss: 0.2244
Epoch [16/20], Loss: 0.2243
Epoch [16/20], Loss: 0.2242
Epoch [16/20], Lo

Epoch [9/20], Loss: 0.2597
Epoch [9/20], Loss: 0.2593
Epoch [9/20], Loss: 0.2591
Epoch [9/20], Loss: 0.2589
Epoch [9/20], Loss: 0.2587
Epoch [9/20], Loss: 0.2584
Epoch [9/20], Loss: 0.2583
Epoch [9/20], Loss: 0.2581
Epoch [9/20], Loss: 0.2582
Epoch [9/20], Loss: 0.2582
Epoch [9/20], Loss: 0.2579
Epoch [9/20], Valid Accuracy: 93.9200, Valid Loss: 0.3076
Epoch [10/20], Loss: 0.2575
Epoch [10/20], Loss: 0.2572
Epoch [10/20], Loss: 0.2570
Epoch [10/20], Loss: 0.2567
Epoch [10/20], Loss: 0.2565
Epoch [10/20], Loss: 0.2562
Epoch [10/20], Loss: 0.2561
Epoch [10/20], Loss: 0.2558
Epoch [10/20], Loss: 0.2555
Epoch [10/20], Loss: 0.2553
Epoch [10/20], Loss: 0.2551
Epoch [10/20], Loss: 0.2548
Epoch [10/20], Loss: 0.2547
Epoch [10/20], Loss: 0.2546
Epoch [10/20], Loss: 0.2545
Epoch [10/20], Loss: 0.2543
Epoch [10/20], Loss: 0.2541
Epoch [10/20], Loss: 0.2537
Epoch [10/20], Loss: 0.2535
Epoch [10/20], Valid Accuracy: 93.9300, Valid Loss: 0.3215
Epoch [11/20], Loss: 0.2533
Epoch [11/20], Loss: 0.253

Epoch [3/20], Loss: 0.3538
Epoch [3/20], Loss: 0.3515
Epoch [3/20], Loss: 0.3499
Epoch [3/20], Loss: 0.3484
Epoch [3/20], Loss: 0.3476
Epoch [3/20], Loss: 0.3465
Epoch [3/20], Loss: 0.3453
Epoch [3/20], Loss: 0.3443
Epoch [3/20], Loss: 0.3441
Epoch [3/20], Loss: 0.3427
Epoch [3/20], Loss: 0.3417
Epoch [3/20], Valid Accuracy: 93.7100, Valid Loss: 0.2644
Epoch [4/20], Loss: 0.3403
Epoch [4/20], Loss: 0.3388
Epoch [4/20], Loss: 0.3365
Epoch [4/20], Loss: 0.3349
Epoch [4/20], Loss: 0.3339
Epoch [4/20], Loss: 0.3322
Epoch [4/20], Loss: 0.3312
Epoch [4/20], Loss: 0.3297
Epoch [4/20], Loss: 0.3282
Epoch [4/20], Loss: 0.3274
Epoch [4/20], Loss: 0.3264
Epoch [4/20], Loss: 0.3254
Epoch [4/20], Loss: 0.3244
Epoch [4/20], Loss: 0.3235
Epoch [4/20], Loss: 0.3222
Epoch [4/20], Loss: 0.3208
Epoch [4/20], Loss: 0.3201
Epoch [4/20], Loss: 0.3190
Epoch [4/20], Loss: 0.3185
Epoch [4/20], Valid Accuracy: 93.6000, Valid Loss: 0.2912
Epoch [5/20], Loss: 0.3174
Epoch [5/20], Loss: 0.3157
Epoch [5/20], Loss: 

Epoch [17/20], Loss: 0.2455
Epoch [17/20], Loss: 0.2454
Epoch [17/20], Loss: 0.2453
Epoch [17/20], Loss: 0.2452
Epoch [17/20], Loss: 0.2452
Epoch [17/20], Loss: 0.2451
Epoch [17/20], Loss: 0.2452
Epoch [17/20], Loss: 0.2451
Epoch [17/20], Loss: 0.2450
Epoch [17/20], Valid Accuracy: 94.4400, Valid Loss: 0.3694
Epoch [18/20], Loss: 0.2447
Epoch [18/20], Loss: 0.2445
Epoch [18/20], Loss: 0.2444
Epoch [18/20], Loss: 0.2442
Epoch [18/20], Loss: 0.2441
Epoch [18/20], Loss: 0.2439
Epoch [18/20], Loss: 0.2438
Epoch [18/20], Loss: 0.2436
Epoch [18/20], Loss: 0.2435
Epoch [18/20], Loss: 0.2435
Epoch [18/20], Loss: 0.2435
Epoch [18/20], Loss: 0.2434
Epoch [18/20], Loss: 0.2433
Epoch [18/20], Loss: 0.2433
Epoch [18/20], Loss: 0.2431
Epoch [18/20], Loss: 0.2429
Epoch [18/20], Loss: 0.2428
Epoch [18/20], Loss: 0.2427
Epoch [18/20], Loss: 0.2426
Epoch [18/20], Valid Accuracy: 94.4500, Valid Loss: 0.3883
Epoch [19/20], Loss: 0.2425
Epoch [19/20], Loss: 0.2424
Epoch [19/20], Loss: 0.2424
Epoch [19/20],

Epoch [11/20], Loss: 0.2685
Epoch [11/20], Loss: 0.2684
Epoch [11/20], Loss: 0.2682
Epoch [11/20], Loss: 0.2680
Epoch [11/20], Loss: 0.2677
Epoch [11/20], Valid Accuracy: 94.2500, Valid Loss: 0.3454
Epoch [12/20], Loss: 0.2674
Epoch [12/20], Loss: 0.2671
Epoch [12/20], Loss: 0.2666
Epoch [12/20], Loss: 0.2663
Epoch [12/20], Loss: 0.2659
Epoch [12/20], Loss: 0.2656
Epoch [12/20], Loss: 0.2653
Epoch [12/20], Loss: 0.2652
Epoch [12/20], Loss: 0.2653
Epoch [12/20], Loss: 0.2652
Epoch [12/20], Loss: 0.2649
Epoch [12/20], Loss: 0.2647
Epoch [12/20], Loss: 0.2645
Epoch [12/20], Loss: 0.2641
Epoch [12/20], Loss: 0.2639
Epoch [12/20], Loss: 0.2637
Epoch [12/20], Loss: 0.2634
Epoch [12/20], Loss: 0.2634
Epoch [12/20], Loss: 0.2634
Epoch [12/20], Valid Accuracy: 94.6800, Valid Loss: 0.3277
Epoch [13/20], Loss: 0.2629
Epoch [13/20], Loss: 0.2625
Epoch [13/20], Loss: 0.2621
Epoch [13/20], Loss: 0.2620
Epoch [13/20], Loss: 0.2617
Epoch [13/20], Loss: 0.2614
Epoch [13/20], Loss: 0.2613
Epoch [13/20],

Epoch [5/20], Loss: 0.3674
Epoch [5/20], Loss: 0.3661
Epoch [5/20], Loss: 0.3651
Epoch [5/20], Loss: 0.3644
Epoch [5/20], Valid Accuracy: 93.9000, Valid Loss: 0.2834
Epoch [6/20], Loss: 0.3633
Epoch [6/20], Loss: 0.3620
Epoch [6/20], Loss: 0.3608
Epoch [6/20], Loss: 0.3597
Epoch [6/20], Loss: 0.3583
Epoch [6/20], Loss: 0.3569
Epoch [6/20], Loss: 0.3556
Epoch [6/20], Loss: 0.3542
Epoch [6/20], Loss: 0.3527
Epoch [6/20], Loss: 0.3514
Epoch [6/20], Loss: 0.3498
Epoch [6/20], Loss: 0.3489
Epoch [6/20], Loss: 0.3482
Epoch [6/20], Loss: 0.3473
Epoch [6/20], Loss: 0.3461
Epoch [6/20], Loss: 0.3454
Epoch [6/20], Loss: 0.3445
Epoch [6/20], Loss: 0.3437
Epoch [6/20], Loss: 0.3426
Epoch [6/20], Valid Accuracy: 94.5100, Valid Loss: 0.2907
Epoch [7/20], Loss: 0.3416
Epoch [7/20], Loss: 0.3404
Epoch [7/20], Loss: 0.3390
Epoch [7/20], Loss: 0.3379
Epoch [7/20], Loss: 0.3368
Epoch [7/20], Loss: 0.3359
Epoch [7/20], Loss: 0.3349
Epoch [7/20], Loss: 0.3341
Epoch [7/20], Loss: 0.3333
Epoch [7/20], Loss: 

Epoch [19/20], Loss: 0.2505
Epoch [19/20], Loss: 0.2503
Epoch [19/20], Loss: 0.2502
Epoch [19/20], Valid Accuracy: 94.5100, Valid Loss: 0.3693
Epoch [20/20], Loss: 0.2499
Epoch [20/20], Loss: 0.2497
Epoch [20/20], Loss: 0.2495
Epoch [20/20], Loss: 0.2494
Epoch [20/20], Loss: 0.2493
Epoch [20/20], Loss: 0.2492
Epoch [20/20], Loss: 0.2490
Epoch [20/20], Loss: 0.2488
Epoch [20/20], Loss: 0.2487
Epoch [20/20], Loss: 0.2486
Epoch [20/20], Loss: 0.2485
Epoch [20/20], Loss: 0.2484
Epoch [20/20], Loss: 0.2482
Epoch [20/20], Loss: 0.2481
Epoch [20/20], Loss: 0.2479
Epoch [20/20], Loss: 0.2478
Epoch [20/20], Loss: 0.2477
Epoch [20/20], Loss: 0.2478
Epoch [20/20], Loss: 0.2476
Epoch [20/20], Valid Accuracy: 94.7500, Valid Loss: 0.3340
Epoch [1/20], Loss: 1.4562
Epoch [1/20], Loss: 1.3227
Epoch [1/20], Loss: 1.2657
Epoch [1/20], Loss: 1.2448
Epoch [1/20], Loss: 1.2360
Epoch [1/20], Loss: 1.2321
Epoch [1/20], Loss: 1.2331
Epoch [1/20], Loss: 1.2338
Epoch [1/20], Loss: 1.2376
Epoch [1/20], Loss: 1.2

Epoch [14/20], Loss: 0.3092
Epoch [14/20], Loss: 0.3088
Epoch [14/20], Loss: 0.3084
Epoch [14/20], Loss: 0.3080
Epoch [14/20], Loss: 0.3076
Epoch [14/20], Loss: 0.3071
Epoch [14/20], Loss: 0.3065
Epoch [14/20], Loss: 0.3061
Epoch [14/20], Loss: 0.3059
Epoch [14/20], Loss: 0.3056
Epoch [14/20], Loss: 0.3054
Epoch [14/20], Loss: 0.3050
Epoch [14/20], Loss: 0.3047
Epoch [14/20], Loss: 0.3045
Epoch [14/20], Loss: 0.3041
Epoch [14/20], Loss: 0.3038
Epoch [14/20], Loss: 0.3036
Epoch [14/20], Loss: 0.3031
Epoch [14/20], Loss: 0.3029
Epoch [14/20], Valid Accuracy: 94.5300, Valid Loss: 0.3635
Epoch [15/20], Loss: 0.3025
Epoch [15/20], Loss: 0.3020
Epoch [15/20], Loss: 0.3015
Epoch [15/20], Loss: 0.3011
Epoch [15/20], Loss: 0.3008
Epoch [15/20], Loss: 0.3002
Epoch [15/20], Loss: 0.2999
Epoch [15/20], Loss: 0.2994
Epoch [15/20], Loss: 0.2992
Epoch [15/20], Loss: 0.2989
Epoch [15/20], Loss: 0.2986
Epoch [15/20], Loss: 0.2982
Epoch [15/20], Loss: 0.2978
Epoch [15/20], Loss: 0.2974
Epoch [15/20], Lo

Epoch [8/20], Loss: 0.6903
Epoch [8/20], Loss: 0.6876
Epoch [8/20], Loss: 0.6850
Epoch [8/20], Loss: 0.6827
Epoch [8/20], Loss: 0.6801
Epoch [8/20], Loss: 0.6775
Epoch [8/20], Loss: 0.6751
Epoch [8/20], Loss: 0.6728
Epoch [8/20], Loss: 0.6705
Epoch [8/20], Loss: 0.6682
Epoch [8/20], Loss: 0.6659
Epoch [8/20], Loss: 0.6640
Epoch [8/20], Loss: 0.6618
Epoch [8/20], Loss: 0.6597
Epoch [8/20], Loss: 0.6576
Epoch [8/20], Loss: 0.6561
Epoch [8/20], Valid Accuracy: 90.2900, Valid Loss: 0.3657
Epoch [9/20], Loss: 0.6540
Epoch [9/20], Loss: 0.6518
Epoch [9/20], Loss: 0.6499
Epoch [9/20], Loss: 0.6477
Epoch [9/20], Loss: 0.6456
Epoch [9/20], Loss: 0.6436
Epoch [9/20], Loss: 0.6418
Epoch [9/20], Loss: 0.6401
Epoch [9/20], Loss: 0.6383
Epoch [9/20], Loss: 0.6364
Epoch [9/20], Loss: 0.6344
Epoch [9/20], Loss: 0.6325
Epoch [9/20], Loss: 0.6309
Epoch [9/20], Loss: 0.6292
Epoch [9/20], Loss: 0.6276
Epoch [9/20], Loss: 0.6258
Epoch [9/20], Loss: 0.6242
Epoch [9/20], Loss: 0.6223
Epoch [9/20], Loss: 0.62

In [44]:
drops = [0, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]
res4 = pd.DataFrame(results_4)
res4.columns = ["Valid_Acc", "Valid_Loss", "Train_Loss"]
res4.insert(0, "Dropout Rate", drops)
res4

Unnamed: 0,Dropout Rate,Valid_Acc,Valid_Loss,Train_Loss
0,0.0,93.33,0.489303,0.21852
1,0.1,93.8,0.455044,0.230082
2,0.2,94.11,0.435098,0.238483
3,0.3,93.25,0.484186,0.239066
4,0.5,94.75,0.334009,0.247557
5,0.7,94.06,0.489243,0.272667
6,0.9,88.6,0.437291,0.46097


The dropout level of 0.5 gives the best validation accuracy. 

This is because: 
>1. Zeroing out some neurons in training process can prevent the model from overfitting to the training set, which improves the generalization ability of the model.
>2. Keeping 50% neurons can still keep many predictive neurons, which will not increase the bias too much.

Compared with the L2 regularization, the dropout method performs similar to a weight decay of 0.01. From my observation, these two are both reasonable methods to generalize the model. 

In [45]:
res3

Unnamed: 0,Weight Decay,Valid_Acc,Valid_Loss,Train_Loss
0,0.0,98.15,0.134466,0.036126
1,0.0001,97.85,0.08554,0.044181
2,0.001,97.58,0.075122,0.078494
3,0.01,95.87,0.15255,0.176879
4,0.1,89.69,0.446578,0.474168
5,0.3,85.32,0.787871,0.821641
