In [8]:
import torch
from torch import nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from torch import optim
from sklearn.metrics import accuracy_score
import torch.nn.functional as F
%matplotlib inline

In [2]:
bsz = 10

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=bsz, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
batch_size=bsz, shuffle=True)

In [159]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x, T=1):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x/T, dim=1)

    
class MLPNet(nn.Module):
    def __init__(self):
        super(MLPNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 32)
        self.fc2 = nn.Linear(32, 10)
    def forward(self, x, T=1):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.log_softmax(x/T, dim=1)

    
def validate(model, batches_val):
    model.eval()
    y_pred = []
    y_true = []
    for batch in batches_val:
        features, targets = batch
        y_true += targets.tolist()
        y_pred += model(Variable(features)).topk(1)[1].squeeze().data.tolist()
    model.train()
    return accuracy_score(y_true, y_pred)


def dist_loss(out, labels, teacher, T, alpha=1):
    # KLD instead of cross-entropy
    f = nn.NLLLoss()
    return f(out, labels) + nn.KLDivLoss()(out, teacher)*alpha*T*T

In [43]:
model = MLPNet()

In [44]:
validate(model, test_loader)

0.0986

In [22]:
validate(model, test_loader)

0.102

In [58]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

In [85]:
labels


 3
 4
 8
 5
 9
 5
 2
 6
 0
 5
[torch.LongTensor of size 10]

In [158]:
nn.NLLLoss()(out, Variable(labels))

Variable containing:
 0.1028
[torch.FloatTensor of size 1]

In [97]:
nn.NLLLoss()(model(Variable(images), T=1), Variable(labels))

Variable containing:
 0.1548
[torch.FloatTensor of size 1]

In [157]:
out = model(Variable(images), T=1)

In [122]:
tmp = torch.randn((10, 10))

In [124]:
tmp


 0.1539  0.0262 -1.1954  0.1588  0.7278  2.2301 -0.5055 -0.5056  0.1926  0.2100
 1.1154  0.0551  0.7055  0.3275  1.2062 -0.0442 -0.2309 -0.2658  0.5998 -0.8250
 1.2547  0.4871  0.5015 -1.1518  0.9102 -1.9922 -0.9561  0.9763  0.5878  0.4499
 1.0981 -0.4271 -0.3846 -0.0518 -0.2813  0.2450  0.1398 -1.0106 -0.3075 -0.2331
 0.2396 -1.1178 -0.8445 -0.1227 -0.0193 -1.4706 -0.7373  0.0439  0.2850  1.2408
-0.3649  1.8554  0.0346  1.3589 -2.3692 -1.1487  0.2179 -0.9799 -0.8932 -0.1461
 0.5386  1.1653 -1.5315 -1.1691 -0.0695 -0.4274  0.6384 -2.2958 -0.5693 -0.1638
 0.8861  2.8362 -0.7377  0.3162  0.0122  0.2033  0.3851  0.5844 -0.3663 -1.4459
 1.6428  0.1251 -1.1547 -1.7958 -0.5284  0.0292 -1.0785  0.1804  0.6491 -0.7858
-0.2267  0.5243  0.7828 -0.3112  0.0115  0.1925 -0.7195 -0.9484  0.1149 -1.3008
[torch.FloatTensor of size 10x10]

In [125]:
out

Variable containing:
-4.1462 -2.8970 -1.7497 -1.0912 -3.6110 -3.0325 -4.7618 -1.7315 -2.3107 -2.8245
-2.9200 -2.7526 -2.1662 -2.6158 -1.4263 -2.5391 -1.9825 -2.4884 -2.4612 -2.6684
-4.0901 -3.0911 -2.9071 -2.0496 -3.1610 -2.1145 -2.3699 -3.8722 -0.8469 -3.0309
-4.0136 -3.9463 -4.2655 -2.0009 -3.4587 -0.8159 -3.3900 -2.5985 -2.7108 -1.8015
-4.7107 -4.5514 -3.5822 -2.8478 -2.2337 -3.7733 -7.2636 -1.8880 -2.6121 -0.6179
-2.8180 -3.8386 -3.4167 -2.1056 -2.7148 -1.1374 -2.7607 -2.6737 -3.0516 -1.6207
-3.3092 -3.5563 -0.6738 -1.7274 -4.7652 -4.0080 -4.2577 -2.7040 -2.1067 -4.0201
-2.1608 -3.8429 -2.7477 -3.5961 -2.2231 -2.7197 -0.7823 -3.8699 -2.4188 -3.4923
-1.5345 -2.5138 -2.2990 -2.9358 -2.4746 -2.6045 -1.9151 -2.5914 -2.4913 -2.4437
-5.1462 -7.8088 -6.5441 -2.5508 -5.7172 -0.2723 -3.0963 -6.2423 -3.4774 -2.6397
[torch.FloatTensor of size 10x10]

In [131]:
nn.KLDivLoss()(model(Variable(images), T=1), Variable(out.data))

Variable containing:
 0
[torch.FloatTensor of size 1]

In [145]:
dist_loss(model(Variable(images), T=1) , Variable(labels), Variable(out.data), 0)

Variable containing:
 0.2022
[torch.FloatTensor of size 1]

In [89]:
Variable(targets)

RuntimeError: Variable data has to be a tensor, but got Variable

In [16]:

criterion = nn.NLLLoss()
n_epochs = 5
print_every = 1000

<h1>Train CNN

In [57]:
train_loss = []
val_loss = []
model = Net()
optimizer = optim.Adam(model.parameters())
for epoch in range(n_epochs):
    print('EPOCH: {}'.format(epoch))
    val_loss += [validate(model, test_loader)]
    print(val_loss[-1])
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()
        features, targets = batch
        features = Variable(features)
        targets = Variable(targets)
        out = model(features)
        loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        train_loss += [loss.data[0]]
        if (i + 1) % 1000 == 0:
            print(np.max(train_loss), np.mean(train_loss))
            train_loss = []

EPOCH: 0
0.0786
2.519871234893799 0.8577690431810916
1.9651288986206055 0.4190021920343861
2.404747247695923 0.33846910459490026
1.8743788003921509 0.32285841274261473
2.3775455951690674 0.2929125738083385
3.9713432788848877 0.26570564077064046
EPOCH: 1
0.9756
2.476496696472168 0.24641338495412493
2.035698652267456 0.2383810167213669
3.515939235687256 0.2246983324928442
2.2007484436035156 0.219863897053001
2.840733051300049 0.22937279218275217
2.179814577102661 0.21133869342284742
EPOCH: 2
0.9806
1.862501859664917 0.20715391832842578
2.0298733711242676 0.20375876118706945
1.8114639520645142 0.20473041690396349
2.1009151935577393 0.20983489174311398
1.7443807125091553 0.18402129105722997
2.7196121215820312 0.20471289938790868
EPOCH: 3
0.983
2.0051004886627197 0.19047316831730132
2.773045301437378 0.1960992682632641
3.0417144298553467 0.19267630136001845
1.84195876121521 0.18145983694877213
2.0361733436584473 0.17788843801156326
2.326615810394287 0.18020267077093013
EPOCH: 4
0.9852
2.596

<h1>Train MLP

In [55]:
train_loss = []
val_loss = []
model_mlp = MLPNet()
optimizer = optim.Adam(model_mlp.parameters())
for epoch in range(n_epochs):
    print('EPOCH: {}'.format(epoch))
    val_loss += [validate(model_mlp, test_loader)]
    print(val_loss[-1])
    for i, batch in enumerate(train_loader):
        optimizer.zero_grad()
        features, targets = batch
        features = Variable(features)
        targets = Variable(targets)
        out = model_mlp(features)
        loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        train_loss += [loss.data[0]]
        if (i + 1) % 1000 == 0:
            print(np.max(train_loss), np.mean(train_loss))
            train_loss = []

EPOCH: 0
0.0868
2.4339194297790527 1.281710759960115
2.5909934043884277 1.154912054359913
2.312778949737549 1.1212260313797742
2.7982451915740967 1.1218074560388922
3.1012041568756104 1.0922972073554993
2.335113763809204 1.0780489094108343
EPOCH: 1
0.6513
2.244574785232544 1.0718755698390305
2.707176685333252 1.041054245964624
2.437919855117798 1.0873546012192965
3.0367701053619385 1.0623042701147496
2.3025853633880615 1.0523403420951218
2.5725345611572266 1.0680838023247197
EPOCH: 2
0.6588
2.3025853633880615 1.0417463421588764
2.4176878929138184 1.0324794520940632
2.245634078979492 1.0593762391423807
2.5138306617736816 1.032323133953847
3.0049571990966797 1.0340886754356324
2.774027109146118 1.0594505604356528
EPOCH: 3
0.6507
2.2482972145080566 0.9655954087497229
2.1898789405822754 0.828860950259259
2.588641405105591 0.8287117438032292
2.163239002227783 0.8238641455876641
2.395068407058716 0.816756155544892
2.046461820602417 0.8145547002084204
EPOCH: 4
0.7492
2.129333734512329 0.82455