In [8]:
import torch
from torch import nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from torch import optim
from sklearn.metrics import accuracy_score
import torch.nn.functional as F
%matplotlib inline

In [2]:
bsz = 10

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=bsz, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
batch_size=bsz, shuffle=True)

In [54]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

class MLPNet(nn.Module):
    def __init__(self):
        super(MLPNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 32)
        self.fc2 = nn.Linear(32, 10)
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.log_softmax(x, dim=1)

def validate(model, batches_val):
    model.eval()
    y_pred = []
    y_true = []
    for batch in batches_val:
        features, targets = batch
        y_true += targets.tolist()
        y_pred += model(Variable(features)).topk(1)[1].squeeze().data.tolist()
    model.train()
    return accuracy_score(y_true, y_pred)

def dist_loss(out, labels, teacher):
    f = nn.NLLLoss()
    return f(out, labels) + f(out, teacher)

In [43]:
model = MLPNet()

In [44]:
validate(model, test_loader)

0.0986

In [22]:
validate(model, test_loader)

0.102

In [11]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

In [16]:

criterion = nn.NLLLoss()
n_epochs = 5
print_every = 1000

<h1>Train CNN

In [29]:
train_loss = []
val_loss = []
model = Net()
optimizer = optim.Adam(model.parameters())
for epoch in range(n_epochs):
    print('EPOCH: {}'.format(epoch))
    val_loss += [validate(model, test_loader)]
    print(val_loss[-1])
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()
        features, targets = batch
        features = Variable(features)
        targets = Variable(targets)
        out = model(features)
        loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        train_loss += [loss.data[0]]
        if (i + 1) % 1000 == 0:
            print(np.max(train_loss), np.mean(train_loss))
            train_loss = []

EPOCH: 0
0.0804
2.5781917572021484 0.8341341545116157
1.7495803833007812 0.3859355239938013
2.3904292583465576 0.31571823291108014
2.082207441329956 0.2902385855235625
2.3022232055664062 0.26127666417288126
1.8474044799804688 0.24793207397690276
EPOCH: 1
0.9765
2.2189457416534424 0.2333276081267395
2.178832769393921 0.23090576609777053
1.6065044403076172 0.21492557313443103
1.5838687419891357 0.2060381634734222
1.9768308401107788 0.20097076870608727
2.2970266342163086 0.20756648269458675
EPOCH: 2
0.9847
2.664994239807129 0.19181009163678392
3.1582534313201904 0.19489831896327087
2.1870269775390625 0.18709432537964313
1.8702701330184937 0.18550107920240771
1.3937405347824097 0.18125683574864707
2.4489450454711914 0.17984646524955314
EPOCH: 3
0.9848
3.065427780151367 0.17222743564764095
1.9703648090362549 0.19061334291644744
1.6735700368881226 0.1792666082298383
1.8166437149047852 0.1744234424589522
1.915728211402893 0.17359374361951632
2.1651902198791504 0.16571110398028396
EPOCH: 4
0.9

<h1>Train MLP

In [55]:
train_loss = []
val_loss = []
model_mlp = MLPNet()
optimizer = optim.Adam(model_mlp.parameters())
for epoch in range(n_epochs):
    print('EPOCH: {}'.format(epoch))
    val_loss += [validate(model_mlp, test_loader)]
    print(val_loss[-1])
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()
        features, targets = batch
        features = Variable(features)
        targets = Variable(targets)
        out = model_mlp(features)
        loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        train_loss += [loss.data[0]]
        if (i + 1) % 1000 == 0:
            print(np.max(train_loss), np.mean(train_loss))
            train_loss = []

EPOCH: 0
0.0868
2.4339194297790527 1.281710759960115
2.5909934043884277 1.154912054359913
2.312778949737549 1.1212260313797742
2.7982451915740967 1.1218074560388922
3.1012041568756104 1.0922972073554993
2.335113763809204 1.0780489094108343
EPOCH: 1
0.6513
2.244574785232544 1.0718755698390305
2.707176685333252 1.041054245964624
2.437919855117798 1.0873546012192965
3.0367701053619385 1.0623042701147496
2.3025853633880615 1.0523403420951218
2.5725345611572266 1.0680838023247197
EPOCH: 2
0.6588
2.3025853633880615 1.0417463421588764
2.4176878929138184 1.0324794520940632
2.245634078979492 1.0593762391423807
2.5138306617736816 1.032323133953847
3.0049571990966797 1.0340886754356324
2.774027109146118 1.0594505604356528
EPOCH: 3
0.6507
2.2482972145080566 0.9655954087497229
2.1898789405822754 0.828860950259259
2.588641405105591 0.8287117438032292
2.163239002227783 0.8238641455876641
2.395068407058716 0.816756155544892
2.046461820602417 0.8145547002084204
EPOCH: 4
0.7492
2.129333734512329 0.82455