In [1]:
SEED = 11

In [2]:
import torch
import matplotlib.pyplot as plt
from IPython.display import display, Math

In [3]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader

In [212]:
def conv_2_block(in_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, kernel_size = 3, padding = 1),
        nn.ReLU(),
        nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
        nn.ReLU(),
        nn.MaxPool2d(2, 2),
    )
    return(model)

def conv_3_block(in_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, kernel_size = 3, padding = 1),
        nn.ReLU(),
        nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
        nn.ReLU(),
        nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
        nn.ReLU(),
        nn.MaxPool2d(2, 2),
    )
    return(model)

class myVGG(nn.Module):
    def __init__(self, base_dim, num_classes = 10):
        super(myVGG, self).__init__()
        self.feature = nn.Sequential(
             conv_2_block(1, base_dim),
             conv_2_block(base_dim, 2*base_dim),
             conv_3_block(2*base_dim, 4*base_dim),
             conv_3_block(4*base_dim, 8*base_dim),
#              conv_3_block(8*base_dim, 8*base_dim),
        )
        
        self.fc_layer = nn.Sequential(
             nn.Linear(8*base_dim, 100),
             nn.ReLU(True),
             nn.Linear(100, 20),
             nn.ReLU(True),
             nn.Linear(20, num_classes)
        )
    def forward(self, x):
        x = self.feature(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layer(x)
        return(x)

In [213]:
# load mnist dataset 
mnist_train = datasets.MNIST("./", train=True,
                            transform = transforms.ToTensor(),
                            target_transform=None,
                            download=True)

In [214]:
mnist_test = datasets.MNIST("./", train=False,
                            transform = transforms.ToTensor(),
                            target_transform=None,
                            download=True)

In [215]:
device = torch.device("cuda:0")
model = myVGG(base_dim = 16, num_classes=len(mnist_train.classes)).to(device)


In [221]:
batch_size = 128
learning_rate = 0.0002
num_epoch = 10

In [222]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [223]:
train_loader = DataLoader(mnist_train, batch_size = batch_size,
                         shuffle = True, num_workers = 2,
                         drop_last = True)

In [224]:
test_loader = DataLoader(mnist_test, batch_size = batch_size,
                         shuffle = False, num_workers = 2,
                         drop_last = True)

In [225]:
arr_loss = []
for ii in range(num_epoch):
    for jj, [image, label] in enumerate(train_loader):
        x = image.to(device)
        y_ = label.to(device)
        
        optimizer.zero_grad() # initialize 0 for each data
        output = model.forward(x)
        loss = loss_func(output, y_)
        loss.backward() # calculate back prop (gradient)
        optimizer.step() # update weight
        if jj % 1000 == 0:
            print(loss)
            arr_loss.append(loss.cpu().detach().numpy())
        

tensor(0.0234, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0043, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0002, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0008, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0007, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0001, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0373, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0004, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0003, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0003, device='cuda:0', grad_fn=<NllLossBackward>)


In [226]:
correct = 0
total = 0

with torch.no_grad():
    for image, label in test_loader:
        x = image.to(device)
        y_ = label.to(device)
        output = model.forward(x)
        _, output_index = torch.max(output, 1)
        total += label.size(0)
        correct += (output_index == y_).sum().float()
    print('Test acc: {}'.format(correct/total))

Test acc: 0.9940905570983887
