In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt



In [2]:
inp_size = 28*28 # image size: 28 by 28 pixels
hidden_size = 100
op_size = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.01

# load data
data = torchvision.datasets.MNIST(root='./MNIST_data', train=True, transform = transforms.ToTensor(), download=True)
target = torchvision.datasets.MNIST(root='./MNIST_data', train=False , transform = transforms.ToTensor())

In [3]:
train_loader = torch.utils.data.DataLoader(dataset=data,batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=target,batch_size=batch_size, shuffle=False)

In [4]:
examples = iter(train_loader)
samples, labels = next(examples)
print(samples.shape, labels.shape)
# 100 samples and 100 labels -> each sample have a single label

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size,num_classes)
        
    def forward(self,x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out

model = NeuralNet(inp_size, hidden_size, op_size)        

In [6]:
criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
# training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # curr images.shape -> 100,1,28,28
        # change it to -> 100,784
        images = images.reshape(-1,784)
        
        # forward pass
        output = model(images)
        
        # loss
        loss = criterion(output, labels)
        
        # backward pass
        loss.backward()
        
        # update weights
        optimiser.step()

        # reload gradients to zero
        optimiser.zero_grad()
        
        if (i+1)%100 == 0: print(f'epoch {epoch}/{num_epochs} step {i+1}/{len(train_loader)} loss: {loss}')

epoch 0/2 step 100/600 loss: 0.3692323565483093
epoch 0/2 step 200/600 loss: 0.2848740220069885
epoch 0/2 step 300/600 loss: 0.22716006636619568
epoch 0/2 step 400/600 loss: 0.24613647162914276
epoch 0/2 step 500/600 loss: 0.16568170487880707
epoch 0/2 step 600/600 loss: 0.09673020243644714
epoch 1/2 step 100/600 loss: 0.08737045526504517
epoch 1/2 step 200/600 loss: 0.042530473321676254
epoch 1/2 step 300/600 loss: 0.1409296840429306
epoch 1/2 step 400/600 loss: 0.11258244514465332
epoch 1/2 step 500/600 loss: 0.1596161276102066
epoch 1/2 step 600/600 loss: 0.09192123264074326


In [10]:
# test
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1,784)
        outputs = model(images)
        
        # value, index
        _, predictions = torch.max(outputs.data, 1)
        n_samples += labels.shape[0]
        n_correct += (predictions == labels).sum()
        
    acc = 100.0*n_correct/n_samples
    print(f'accuracy: {acc:.2f}%')

accuracy: 96.28%
