In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

In [2]:
#load data
#this might take a while as it will download the dataset from internet
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
data_train = torchvision.datasets.MNIST('./', download=True, train=True, transform = transform)
data_test = torchvision.datasets.MNIST('./', download=True, train=False, transform = transform)

In [3]:
# DATA LOADER

from torch.utils.data import DataLoader

data_train_loader = DataLoader(data_train, batch_size=64, shuffle=True)
data_test_loader = DataLoader(data_test, batch_size=64, shuffle=False)


In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 128) 
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10) 
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.log_softmax(self.fc3(x), dim=1)
        return x
    

        

In [5]:
criterion = nn.NLLLoss() # Negative Log Likelyhood Loss


In [6]:
network = Net()

optimizer = optim.SGD(network.parameters(), lr=0.005, momentum=0.9) # implements stochastic gradient descent
    
for epoch in range(5):
    loss_train = 0
    # network.train()

    for images, labels in data_train_loader:
        optimizer.zero_grad() # zeros out the gradients (i.e. resets the gradient)
        images = images.view(images.shape[0], -1) # setting up images to be the correct shape
        output = network(images)

        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        loss_train += loss.item()
    print(f"{epoch} Loss Train: {loss_train / len(data_train_loader)}")
    
    # network.eval()
    with torch.no_grad():
        loss_test = 0
        for images, labels in data_test_loader:
            optimizer.zero_grad()
            images = images.view(images.shape[0], -1)
            output = network(images)
            loss = criterion(output, labels)
            # loss.backward()
            # optimizer.step()
            loss_test += loss.item()
    
        print(f"{epoch} Loss Test: {loss_test / len(data_test_loader)}")
        
        

0 Loss Train: 0.5229609786892242
0 Loss Test: 0.26666817385228764
1 Loss Train: 0.2284987339460011
1 Loss Test: 0.18311687318050557
2 Loss Train: 0.1654558765656277
2 Loss Test: 0.1423623344125404
3 Loss Train: 0.12782925801244435
3 Loss Test: 0.11966240063167302
4 Loss Train: 0.10710291980581084
4 Loss Test: 0.1000118432989178
