In [1]:
import torch, tqdm 
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('GPU State:', device)

GPU State: cuda:0


In [3]:
# Transform
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,)),]
)

In [4]:
# Data
trainSet = datasets.MNIST(root='MNIST', download=True, train=True, transform=transform)
testSet = datasets.MNIST(root='MNIST', download=True, train=False, transform=transform)
trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=64, shuffle=True)
testLoader = torch.utils.data.DataLoader(testSet, batch_size=64, shuffle=False)

In [6]:
# Model
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.main = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=10, kernel_size=2),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=10, out_channels=100, kernel_size=2),
            torch.nn.Flatten(),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features=100*676, out_features=10),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, input):
        return self.main(input)


net = Net().to(device)
print(net)

Net(
  (main): Sequential(
    (0): Conv2d(1, 10, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 100, kernel_size=(2, 2), stride=(1, 1))
    (3): Flatten()
    (4): ReLU()
    (5): Linear(in_features=67600, out_features=10, bias=True)
    (6): Softmax(dim=1)
  )
)


In [7]:
# Parameters
epochs = 100
lr = 0.002
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.002, momentum=0.9)

In [7]:
# Train
for epoch in range(epochs):
    running_loss = 0.0

    for times, data in enumerate(trainLoader):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.view(inputs.shape[0], -1)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Foward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if times % 100 == 99 or times+1 == len(trainLoader):
            print('[%d/%d, %d/%d] loss: %.3f' % (epoch+1, epochs, times+1, len(trainLoader), running_loss/2000))

print('Training Finished.')

[1/3, 100/938] loss: 0.108
[1/3, 200/938] loss: 0.183
[1/3, 300/938] loss: 0.226
[1/3, 400/938] loss: 0.256
[1/3, 500/938] loss: 0.281
[1/3, 600/938] loss: 0.304
[1/3, 700/938] loss: 0.324
[1/3, 800/938] loss: 0.344
[1/3, 900/938] loss: 0.362
[1/3, 938/938] loss: 0.369
[2/3, 100/938] loss: 0.018
[2/3, 200/938] loss: 0.036
[2/3, 300/938] loss: 0.052
[2/3, 400/938] loss: 0.069
[2/3, 500/938] loss: 0.085
[2/3, 600/938] loss: 0.100
[2/3, 700/938] loss: 0.116
[2/3, 800/938] loss: 0.130
[2/3, 900/938] loss: 0.144
[2/3, 938/938] loss: 0.150
[3/3, 100/938] loss: 0.015
[3/3, 200/938] loss: 0.029
[3/3, 300/938] loss: 0.043
[3/3, 400/938] loss: 0.057
[3/3, 500/938] loss: 0.070
[3/3, 600/938] loss: 0.083
[3/3, 700/938] loss: 0.096
[3/3, 800/938] loss: 0.108
[3/3, 900/938] loss: 0.120
[3/3, 938/938] loss: 0.124
Training Finished.


In [8]:
correct = 0
total = 0

with torch.no_grad():
    for data in testLoader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        inputs = inputs.view(inputs.shape[0], -1)

        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100*correct / total))

class_correct = [0 for i in range(10)]
class_total = [0 for i in range(10)]

with torch.no_grad():
    for data in testLoader:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.view(inputs.shape[0], -1)

        outputs = net(inputs)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(10):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1
            print(class_correct)
            print(class_total)

for i in range(10):
    print('Accuracy of %d: %3f' % (i, (class_correct[i]/class_total[i])))

Accuracy of the network on the 10000 test images: 92 %
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
[0, 0, 1, 0, 0, 0, 0, 1, 0, 0]
[0, 0, 1, 0, 0, 0, 0, 1, 0, 0]
[0, 1, 1, 0, 0, 0, 0, 1, 0, 0]
[0, 1, 1, 0, 0, 0, 0, 1, 0, 0]
[1, 1, 1, 0, 0, 0, 0, 1, 0, 0]
[1, 1, 1, 0, 0, 0, 0, 1, 0, 0]
[1, 1, 1, 0, 1, 0, 0, 1, 0, 0]
[1, 1, 1, 0, 1, 0, 0, 1, 0, 0]
[1, 2, 1, 0, 1, 0, 0, 1, 0, 0]
[1, 2, 1, 0, 1, 0, 0, 1, 0, 0]
[1, 2, 1, 0, 2, 0, 0, 1, 0, 0]
[1, 2, 1, 0, 2, 0, 0, 1, 0, 0]
[1, 2, 1, 0, 2, 0, 0, 1, 0, 1]
[1, 2, 1, 0, 2, 0, 0, 1, 0, 1]
[1, 2, 1, 0, 2, 0, 0, 1, 0, 1]
[1, 2, 1, 0, 2, 1, 0, 1, 0, 1]
[1, 2, 1, 0, 2, 0, 0, 1, 0, 2]
[1, 2, 1, 0, 2, 1, 0, 1, 0, 2]
[1, 2, 1, 0, 2, 0, 0, 2, 0, 2]
[1, 2, 1, 0, 2, 1, 0, 2, 0, 2]
[1, 2, 1, 0, 3, 0, 0, 2, 0, 2]
[1, 2, 1, 0, 3, 1, 0, 2, 0, 2]
[1, 2, 1, 0, 3, 0, 0, 2, 0, 2]
[1, 2, 1, 0, 3, 1, 1, 2, 0, 2]
[1, 2, 1, 0, 4, 0, 0, 2, 0, 2]
[1, 2, 1, 0, 4, 1, 1, 2, 0, 2]
[1, 2, 1, 1, 4, 0, 0, 2, 0, 2]
[1, 2, 1, 1, 4, 1, 1, 2, 0, 2]
[2, 2, 1, 1, 4,