In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms



In [2]:
num_epochs = 10
batch_size = 100
learning_rate = 0.001

# load data
data = torchvision.datasets.MNIST(root='./MNIST_data', train=True, transform = transforms.ToTensor(), download=True)
target = torchvision.datasets.MNIST(root='./MNIST_data', train=False , transform = transforms.ToTensor())

In [3]:
# train_loader = torch.utils.data.DataLoader(dataset=data,batch_size=batch_size, shuffle=True)
# test_loader = torch.utils.data.DataLoader(dataset=target,batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=target, batch_size=batch_size, shuffle=False)
train_loader = torch.utils.data.DataLoader(dataset=data, batch_size=batch_size, shuffle=True)

In [4]:
examples = iter(train_loader)
samples, labels = next(examples)
print(samples.shape, labels.shape)
# 100 samples and 100 labels -> each sample have a single label

torch.Size([100, 1, 28, 28]) torch.Size([100])


In [5]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # nn.Conv2d(no. of input chanels, num output chanels, kernel size)
        self.conv1 = nn.Sequential(
            nn.Conv2d(1,6,5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6,16,5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc = nn.Linear(16*4*4,10)
    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        # reshape x to flatten
        x = x.view(-1, 16*4*4)
        x = self.fc(x)
        return x
        
model = ConvNet()        

In [6]:
criterion = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
# training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # curr images.shape -> 100,1,28,28
        # change it to -> 100,784
#         images = images.reshape(-1,784)
        
        # forward pass
        output = model(images)
        
        # loss
        loss = criterion(output, labels)
        
        # backward pass
        loss.backward()
        
        # update weights
        optimiser.step()

        # reload gradients to zero
        optimiser.zero_grad()
        
        if (i+1)%100 == 0: print(f'epoch {epoch+1}/{num_epochs} step {i+1}/{len(train_loader)} loss: {loss}')

epoch 1/10 step 100/600 loss: 0.58547043800354
epoch 1/10 step 200/600 loss: 0.32082921266555786
epoch 1/10 step 300/600 loss: 0.14249181747436523
epoch 1/10 step 400/600 loss: 0.1799234002828598
epoch 1/10 step 500/600 loss: 0.10537133365869522
epoch 1/10 step 600/600 loss: 0.1073470413684845
epoch 2/10 step 100/600 loss: 0.12023258209228516
epoch 2/10 step 200/600 loss: 0.08195315301418304
epoch 2/10 step 300/600 loss: 0.06333418935537338
epoch 2/10 step 400/600 loss: 0.14010734856128693
epoch 2/10 step 500/600 loss: 0.0589279904961586
epoch 2/10 step 600/600 loss: 0.11757534742355347
epoch 3/10 step 100/600 loss: 0.09426600486040115
epoch 3/10 step 200/600 loss: 0.14024010300636292
epoch 3/10 step 300/600 loss: 0.07367437332868576
epoch 3/10 step 400/600 loss: 0.14022165536880493
epoch 3/10 step 500/600 loss: 0.07072710245847702
epoch 3/10 step 600/600 loss: 0.03702051565051079
epoch 4/10 step 100/600 loss: 0.039491429924964905
epoch 4/10 step 200/600 loss: 0.027049539610743523
epoc

In [8]:
# test
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        outputs = model(images)
        
        # value, index
        _, predictions = torch.max(outputs.data, 1)
        n_samples += labels.shape[0]
        n_correct += (predictions == labels).sum()
        
    acc = 100.0*n_correct/n_samples
    print(f'accuracy: {acc:.2f}%')

accuracy: 98.74%
