In [2]:
import torch
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import torch.nn.functional as F
from torch import nn
import matplotlib.pyplot as plt


In [4]:

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

trainset = mnist.MNIST('./mnist_dataset/mnist', train=True, transform=transform, download=True)
testset = mnist.MNIST('./mnist_dataset/mnist', train = False, transform=transform, download=True)

train_data = DataLoader(trainset, batch_size=64, shuffle=True)
test_data = DataLoader(testset, batch_size=128, shuffle=False)


Using cuda device
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./mnist_dataset/mnist\MNIST\raw\train-images-idx3-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw


100.0%


Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw\train-labels-idx1-ubyte.gz





Extracting ./mnist_dataset/mnist\MNIST\raw\train-labels-idx1-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./mnist_dataset/mnist\MNIST\raw\t10k-images-idx3-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./mnist_dataset/mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./mnist_dataset/mnist\MNIST\raw






In [4]:


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0) #output_shape=(16,24,24)
        self.relu1 = nn.ReLU() # activation
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2) #output_shape=(16,12,12)
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0) #output_shape=(32,8,8)
        self.relu2 = nn.ReLU() # activation
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2) #output_shape=(32,4,4)
        # Fully connected 1 ,#input_shape=(32*4*4)
        self.fc1 = nn.Linear(32 * 4 * 4, 10)
        
    def forward(self, x):
        # Convolution 1
        x = self.cnn1(x)
        x = self.relu1(x)
        # Max pool 1
        x = self.maxpool1(x)
        # Convolution 2 
        x = self.cnn2(x)
        x = self.relu2(x)
        # Max pool 2 
        x = self.maxpool2(x)
        x = x.view(x.size(0), -1)
        # Linear function (readout)
        x = self.fc1(x)
        return x


In [5]:

# build model
model = CNN().to(device)

# define cost
criterion = nn.CrossEntropyLoss()

# optimization
optimizer = torch.optim.SGD(model.parameters(), 0.1)

losses = []
acces = []
eval_losses = []
eval_acces = []

for epoch in range(5):
    train_loss = 0
    train_acc = 0
    model.train()

    for im, label in train_data:

        im = im.to(device)
        label = label.to(device)
        pred = model(im)

        loss = criterion(pred, label)

        # zero the parameter gradients
        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        train_loss += loss.item()


        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        train_acc += acc

    losses.append(train_loss / len(train_data))
    acces.append(train_acc / len(train_data))

    eval_loss = 0
    eval_acc = 0
    model.eval()

    for im, label in test_data:

        im = im.to(device)
        label = label.to(device)

        pred = model(im)
        loss = criterion(pred, label)

        eval_loss += loss.item()

        _, pred = pred.max(1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / im.shape[0]
        eval_acc += acc

    eval_losses.append(eval_loss / len(test_data))
    eval_acces.append(eval_acc / len(test_data))
    print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
          .format(epoch, train_loss / len(train_data), train_acc / len(train_data), eval_loss / len(test_data), eval_acc / len(test_data)))

epoch: 0, Train Loss: 0.184962, Train Acc: 0.942697, Eval Loss: 0.054687, Eval Acc: 0.983287
epoch: 1, Train Loss: 0.054788, Train Acc: 0.983242, Eval Loss: 0.048368, Eval Acc: 0.983979
epoch: 2, Train Loss: 0.042007, Train Acc: 0.986924, Eval Loss: 0.039997, Eval Acc: 0.987243
epoch: 3, Train Loss: 0.033907, Train Acc: 0.989672, Eval Loss: 0.032425, Eval Acc: 0.989122


KeyboardInterrupt: 