In [28]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from collections import defaultdict
from torchviz import make_dot

In [29]:
def getLoaders(batch_size=256, num_workers=4):
    transform = transforms.Compose([
                    transforms.ToTensor(),
                ])
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, 
                                      transform=transform, download=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False,
                                    transform=transform, download=True)
    
    trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, 
                                              num_workers=num_workers, shuffle=True)
    testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=batch_size,
                                             num_workers=num_workers)
    return trainloader, testloader

In [34]:
class mlp(nn.Module):
    def __init__(self, num_hidden=1):
        super(mlp, self).__init__()
        self.hidden1 = nn.Sequential(nn.Linear(train_x.shape[1],300),
                                     nn.BatchNorm1d(300),
                                     nn.ReLU())
        if num_hidden==2:
            self.hidden2 = nn.Sequential(nn.Linear(300, 300),
                                         nn.BatchNorm1d(300),
                                         nn.ReLU())
        self.output = nn.Sequential(nn.Linear(300, 10),
                                    nn.Softmax(dim=1))
        self.num_hidden = num_hidden
        
        # Optimizer
        if opt=='adam':
            self.optimizer = optim.Adam(self.parameters())
        elif opt=='adad':
            self.optimizer = optim.Adadelta(self.parameters())
        elif opt=='admx':
            self.optimizer = optim.Adamax(self.parameters())
        elif opt=='sgdm':
            self.optimizer = optim.SGD(self.parameters(), lr=0.01, momentum=0.7)
            
        self.criterion = nn.CrossEntropyLoss()
    
    def forward(self, x):
        out = self.hidden1(x)
        if self.num_hidden==2:
            out = self.hidden2(x)
        out = self.output(out)
        return out
    
    def optimize(self, inputs, labels):
        self.optimizer.zero_grad()
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        loss.backward()
        self.optimizer.step()
        return loss.data[0]
    
    def getLoss(self, inputs, labels):
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        return loss.data[0]
    
    def getCorrect(self, inputs, labels):
        outputs = self(inputs)
        _, predicted = torch.max(outputs.data, 1)
        return (predicted == labels.data).sum()

In [39]:
epochs = 20
trainloader, testloader = getLoaders()

def getVariable(data):
    inputs, labels = data
    return Variable(inputs).cuda(), Variable(labels).cuda()

mlp2_losses = defaultdict(lambda : defaultdict(list))
mlp2_err = defaultdict(lambda : defaultdict(list))

for opt in ['sgdm','adam','adad','admx']:
    print('Optimizer : ',opt)
    mlp2 = mlp()
    mlp2.cuda()
    for epoch in range(epochs):
        #print('Starting epoch : ',epoch+1)
        train_loss = 0.0
        train_correct = 0
        n_train = 0
        for i, data in enumerate(trainloader):
            inputs, labels = getVariable(data)
            train_loss += mlp2.optimize(inputs, labels)
            train_correct += mlp2.getCorrect(inputs, labels)
            n_train += len(labels)

        test_loss = 0.0
        test_correct = 0
        n_test = 0
        for i, data in enumerate(testloader):
            inputs, labels = getVariable(data)
            test_loss += mlp2.getLoss(inputs, labels)
            test_correct += mlp2.getCorrect(inputs, labels)
            n_test += len(labels)

        #print('Train Loss : ',train_loss/n_train)
        #print('Train Accuracy : ',1.0*train_correct/n_train)
        #print('Test Loss : ',test_loss/n_test)
        #print('Test Accuracy : ',1.0*test_correct/n_test)
        #print('#############################################')
        mlp2_losses['train'][opt].append(train_loss/n_train)
        mlp2_losses['test'][opt].append(test_loss/n_test)
        mlp2_err['train'][opt].append(1.0-1.0*train_correct/n_train)
        mlp2_err['test'][opt].append(1.0-1.0*test_correct/n_test)


('Optimizer : ', 'sgdm')


AssertionError: Torch not compiled with CUDA enabled

In [19]:
print(mlp_3layer)

mlp(
  (hidden1): Sequential(
    (0): Linear(in_features=784, out_features=300, bias=True)
    (1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU()
  )
  (hidden2): Sequential(
    (0): Linear(in_features=300, out_features=300, bias=True)
    (1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU()
  )
  (output): Linear(in_features=300, out_features=10, bias=True)
)
