In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, datasets


In [9]:
# Introduce MNIST dataset, and train, test set respectively
trainset = datasets.MNIST('', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
testset = datasets.MNIST('', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))

# Slice the dataset into batch(batch_size)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, shuffle=True, pin_memory=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=True, pin_memory=True)

In [4]:
# Build the Neural Network(fully connected layer)
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    # convolution layers
    self.conv1 = nn.Conv2d(1, 64, 5) # input channel=1, output channel=64, kernel size=5
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(64, 32, 5)
    # fully connected layers
    self.fc1 = nn.Linear(32*4*4, 128) # 32 channels, 4 * 4 size
    self.fc2 = nn.Linear(128, 64)
    self.fc3 = nn.Linear(64, 64)
    self.fc4 = nn.Linear(64, 64)

  def forward(self, x):
    # state size: 28*28(input image size)
    x = self.pool(F.relu(self.conv1(x)))
    # state size: 12*12
    x = self.pool(F.relu(self.conv2(x)))
    # state size: 4*4
    x = x.view(-1, 32*4*4)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.relu(self.fc3(x))
    x = self.fc4(x)

    return F.log_softmax(x, dim=1)



In [5]:
# Initialize model
model = Model()
# Create a Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.train()
epochs = 5
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(trainloader):
    X, y = data

    # Training process
    # clear the gradient from aggregating everytime after we update parameters
    optimizer.zero_grad()
    # put the mini-batch training data into Neural Network, and get the predicted labels
    predicted = model(X)
    # compare the predicted labels with ground-truth labels
    loss = F.nll_loss(predicted, y)
    # compute the gradient
    loss.backward()
    # optimize the network
    optimizer.step()
    running_loss += loss.item()
    # print every 1000 mini-batches
    if i % 100 == 99:
      print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss/100))
      running_loss = 0.0


[1,   100] loss: 1.960
[1,   200] loss: 0.457
[1,   300] loss: 0.223
[1,   400] loss: 0.157
[1,   500] loss: 0.134
[1,   600] loss: 0.108
[2,   100] loss: 0.095
[2,   200] loss: 0.088
[2,   300] loss: 0.079
[2,   400] loss: 0.068
[2,   500] loss: 0.072
[2,   600] loss: 0.081
[3,   100] loss: 0.055
[3,   200] loss: 0.051
[3,   300] loss: 0.063
[3,   400] loss: 0.056
[3,   500] loss: 0.053
[3,   600] loss: 0.050
[4,   100] loss: 0.043
[4,   200] loss: 0.046
[4,   300] loss: 0.040
[4,   400] loss: 0.043
[4,   500] loss: 0.049
[4,   600] loss: 0.042
[5,   100] loss: 0.030
[5,   200] loss: 0.033
[5,   300] loss: 0.044
[5,   400] loss: 0.035
[5,   500] loss: 0.034
[5,   600] loss: 0.037


In [11]:
# Evaluate Training set, Testing set by trained Neural Network
model.eval()

correct = 0
total = 0
with torch.no_grad():
  for data in trainloader:
    X, y = data
    output = model(X)
    correct += (torch.argmax(output, dim=1) == y).sum().item()
    total += y.size(0)

print(f'Training data Accuracy: {correct}/{total} = {round(correct/total, 3)}')

correct = 0
total = 0
with torch.no_grad():
  for data in testloader:
    X, y = data
    output = model(X)
    correct += (torch.argmax(output, dim=1) == y).sum().item()
    total += y.size(0)
print(f'Testing data Accuracy: {correct}/{total} = {round(correct/total, 3)}')

Training data Accuracy: 59250/60000 = 0.988
Testing data Accuracy: 9860/10000 = 0.986
