In [1]:
import torch
import torch.nn as nn

import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda:0")

### Extract, Transform, Load

In [3]:
# Extract and Transform
train_set = torchvision.datasets.MNIST(
    root=r'./Dataset',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

test_set = torchvision.datasets.MNIST(
    root=r'./Dataset',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

# Load
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size = 100,
    shuffle = True  # to have the data reshuffled at every epoch
)

test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size = 100,
    shuffle = False
)

In [4]:
batch = next(iter(train_loader))
images, labels = batch
print(images.shape,'\t',labels.shape)

torch.Size([100, 1, 28, 28]) 	 torch.Size([100])


### Create Network

In [5]:
class ConvNetwork(nn.Module):
    def __init__(self):
        super(ConvNetwork, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=3, stride=1, padding=1)

        self.fc1 = nn.Linear(in_features=12*6*6, out_features=120)
        # self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=120, out_features=10)
        
    def forward(self, d):
        # conv1 layer
        d = self.conv1(d)
        d = nn.functional.relu(d)
        d = nn.functional.max_pool2d(d, kernel_size=2, stride=2)
    
        # conv2 layer
        d = self.conv2(d)
        d = nn.functional.relu(d)
        d = nn.functional.max_pool2d(d, kernel_size=2, stride=2)

        # fully connected Layer1
        # d = d.reshape(-1, 12*4*4)
        d = d.flatten(start_dim=1)
        d = self.fc1(d)
        d = nn.functional.relu(d)

        # fully connected Layer2
        # d = self.fc2(d)
        # d = nn.functional.relu(d)

        # output layer
        d = self.out(d)
        # d = nn.functional.softmax(d)

        return d

In [6]:
network = ConvNetwork()

### Train

In [8]:
import torch.optim as optim

from torch.utils.tensorboard import SummaryWriter

In [9]:
def test_model(model, loss_fn, test_loader):
    model.eval()

    loss = 0.
    num_correct = 0.
    
    for batch_idx, (x, y) in enumerate(test_loader):
        y_pred = model(x)
        loss += loss_fn(y_pred, y).item()
        _, predicted = torch.max(y_pred.data, 1)
        num_correct += (predicted == y).sum().item()

    model.train()
    
    loss /= len(test_loader.dataset)
    num_correct /= len(test_loader.dataset)
    
    return loss, num_correct

In [10]:
def get_num_correct(preds, labels):
    '''
    Calculate the number of correct predictions
    '''
    return preds.argmax(dim=1).eq(labels).sum().item()

In [11]:
# Learning Rate
lr = 0.008

loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(network.parameters(), lr=lr) 

# Training times
for epoch in range(20):
    train_loss = 0
    train_correct = 0
    
    for batch in train_loader:
        images, labels = batch
        
        # forward
        preds = network(images)
        
        # Compute the loss value.
        loss = loss_function(preds, labels)
        train_loss += loss.item()
        train_correct += get_num_correct(preds, labels)
        
        # Update the weights
        optimizer.zero_grad()  # Initialize to zero.
        loss.backward()
        optimizer.step()       # Updating the weights
        
        
    # Compute the average loss and accuracy.
    train_loss /= len(train_set)
    train_correct /= float(len(train_set))
    
    # Evaluate the model on the test set.
    test_loss, test_correct = test_model(network, loss_function, test_loader)
    
    # Report progress
    print(f'#{epoch}: loss_test={test_loss:.4f}, loss_train={train_loss:.4f}, acc_test={test_correct:.4f}, acc_train={train_correct:.4f}')
    

# save model
torch.save(network, './model/Model3.pkl')


#0: loss_test=0.0204, loss_train=0.0226, acc_test=0.5294, acc_train=0.2776
#1: loss_test=0.0043, loss_train=0.0083, acc_test=0.8668, acc_train=0.7732
#2: loss_test=0.0031, loss_train=0.0038, acc_test=0.9070, acc_train=0.8844
#3: loss_test=0.0028, loss_train=0.0030, acc_test=0.9174, acc_train=0.9082
#4: loss_test=0.0022, loss_train=0.0025, acc_test=0.9364, acc_train=0.9238
#5: loss_test=0.0019, loss_train=0.0021, acc_test=0.9437, acc_train=0.9347
#6: loss_test=0.0016, loss_train=0.0018, acc_test=0.9500, acc_train=0.9442
#7: loss_test=0.0016, loss_train=0.0016, acc_test=0.9479, acc_train=0.9508
#8: loss_test=0.0013, loss_train=0.0015, acc_test=0.9595, acc_train=0.9555
#9: loss_test=0.0015, loss_train=0.0013, acc_test=0.9527, acc_train=0.9601
#10: loss_test=0.0011, loss_train=0.0012, acc_test=0.9661, acc_train=0.9640
#11: loss_test=0.0010, loss_train=0.0011, acc_test=0.9693, acc_train=0.9670
#12: loss_test=0.0009, loss_train=0.0010, acc_test=0.9703, acc_train=0.9684
#13: loss_test=0.0008,