In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time

from torchvision import datasets, transforms
from tensorboardX import SummaryWriter

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

In [3]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f7af9b10730>

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200,10)

    def forward(self, x):
        x = x.view((-1, 28*28))
        x = F.relu(self.fc(x))
        x = self.fc2(x)
        return x
    
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(5, 5))
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=(5, 5))
        self.bn2 = nn.BatchNorm2d(32)
        self.conv2_drop = nn.Dropout2d(p=0.2)
        self.fc1 = nn.Linear(128, 100)
        self.fc2 = nn.Linear(100, 10)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3))
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=(3, 3))
        self.bn4 = nn.BatchNorm2d(64)
        self.conv5 = nn.Conv2d(64, 128, kernel_size=(3, 3))
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=(1, 1))
        self.bn6 = nn.BatchNorm2d(128)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.bn1(x)
        x = F.relu(self.conv2(x))
        x = self.conv2_drop(F.max_pool2d(self.bn2(x), 2))
        x = F.relu(self.conv3(x))
        x = self.bn3(x)
        x = F.relu(self.conv4(x))
        x = self.bn4(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2_drop(x)
        x = F.relu(self.conv5(x))
        x = self.bn5(x)
        x = F.relu(self.conv6(x))
        x = self.bn6(x)
        size = x.size()[1] * x.size()[2] * x.size()[3]
        # print(size)
        x = x.view(-1, size)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [6]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
model = Net().to(device)
model.train()

Net(
  (fc): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=10, bias=True)
)

In [8]:
learning_rate = 0.0001
num_epochs = 20

""" SGD vs Adam """
#opt = optim.SGD(params=model.parameters(), lr=learning_rate)
opt = optim.Adam(params=model.parameters(), lr=learning_rate)

ce_loss = torch.nn.CrossEntropyLoss()

writer = SummaryWriter()
tot_steps = 0

for epoch in range(1,num_epochs+1):
    t1 = time.time()
    for batch_idx, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        tot_steps += 1
        opt.zero_grad()
        out = model(x_batch)
        batch_loss = ce_loss(out, y_batch)
        
        if batch_idx % 100 == 0:
            pred = torch.max(out, dim=1)[1]
            acc = pred.eq(y_batch).sum().item() / float(batch_size)
            
            writer.add_scalar('data/accuracy', acc, tot_steps)
            writer.add_scalar('data/loss', batch_loss.item(), tot_steps)
        
        batch_loss.backward()
        opt.step()
        
        #for param in model.parameters():
        #    param.data -= learning_rate * param.grad.data
        
    tot_test, tot_acc = 0.0, 0.0
    for batch_idx, (x_batch, y_batch) in enumerate(test_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        out = model(x_batch)
        pred = torch.max(out, dim=1)[1]
        acc = pred.eq(y_batch).sum().item()
        tot_acc += acc
        tot_test += x_batch.size()[0]
    t2 = time.time()
        
    print('Epoch %d: Accuracy %.5lf [%.2lf seconds]' % (epoch, tot_acc/tot_test, t2-t1))           

Epoch 1: Accuracy 0.91770 [10.37 seconds]
Epoch 2: Accuracy 0.93580 [10.41 seconds]
Epoch 3: Accuracy 0.94730 [9.58 seconds]
Epoch 4: Accuracy 0.95460 [9.44 seconds]
Epoch 5: Accuracy 0.95860 [9.48 seconds]
Epoch 6: Accuracy 0.96420 [9.23 seconds]
Epoch 7: Accuracy 0.96820 [9.76 seconds]
Epoch 8: Accuracy 0.96860 [11.28 seconds]
Epoch 9: Accuracy 0.97150 [10.33 seconds]
Epoch 10: Accuracy 0.97330 [9.82 seconds]
Epoch 11: Accuracy 0.97450 [10.75 seconds]
Epoch 12: Accuracy 0.97660 [10.58 seconds]
Epoch 13: Accuracy 0.97560 [10.65 seconds]
Epoch 14: Accuracy 0.97700 [10.47 seconds]
Epoch 15: Accuracy 0.97820 [11.11 seconds]
Epoch 16: Accuracy 0.97890 [12.58 seconds]
Epoch 17: Accuracy 0.97820 [11.16 seconds]
Epoch 18: Accuracy 0.97820 [12.21 seconds]
Epoch 19: Accuracy 0.97950 [11.59 seconds]
Epoch 20: Accuracy 0.97920 [10.83 seconds]
