In [1]:
import torch 
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [2]:
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

In [3]:
# Defining the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [4]:
# Defining the loss
loss = nn.CrossEntropyLoss()

In [5]:
train_data = datasets.MNIST('data', 
                            train=True,
                            download=True, 
                            transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

 ### Training and validation step

In [None]:
# Training Loop
epochs = 5
for epoch in range (epochs):
    losses = list()
    for batch in train_loader:
        x, y = batch
        
        # x: b * 1 * 28 * 28
        
        b = x.size(0)
        x= x.view(b, -1)
        
        # 1. Forward
        l = model(x)
        
        # 2. Objective function 
        j = loss(l, y)
        
        # 3. Cleaning the gradients
        model.zero_grad()
        
        # 4. Partial derivatives of J 
        j.backward()
        
        # 5. Step the optimizer
        optimizer.step()
        
        
        losses.append(j.item())
        
    print(f' Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean() :.2f}')
    
    for batch in train_loader:
        x, y = batch
        
        # x: b * 1 * 28 * 28
        
        b = x.size(0)
        x= x.view(b, -1)
        
        # 1. Forward
        with torch.no_grad():
            l = model(x)  # the l is a logit
        
        # 2. Objective function 
        j = loss(l, y)
        
        losses.append(j.item())
        
    print(f' Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean() :.2f}')

 Epoch 1, train loss: 1.32
 Epoch 1, validation loss: 0.91
 Epoch 2, train loss: 0.41
 Epoch 2, validation loss: 0.38


### Improving the Model using Residual Connection

In [None]:
# Defining a more flexible model
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0,1)
        
    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h2 + h1)
        logits = self.l3(do)
        return logits
        
Model = ResNet()

In [None]:
# Defining the optimizer 
Params = Model.parameters()
Optimizer = optim.SGD(Params, lr = 0.01)


In [None]:
# Define the loss
Loss = nn.CrossEntropyLoss()

In [None]:
# My training and validation loops
epochs = 5
for epoch in range (epochs):
    Losses = list()
    for batch in train_loader:
        x, y = batch
        
        # x: b * 1 * 28 * 28
        
        b = x.size(0)
        x= x.view(b, -1)
        
        # 1. Forward
        l = Model(x)
        
        # 2. Objective function 
        j = Loss(l, y)
        
        # 3. Cleaning the gradients
        Model.zero_grad()
        
        # 4. Partial derivatives of J 
        j.backward()
        
        # 5. Step the optimizer
        Optimizer.step()
        
        
        Losses.append(j.item())
        
    print(f' Epoch {epoch + 1}, train loss: {torch.tensor(Losses).mean() :.2f}')
    
    for batch in train_loader:
        x, y = batch
        
        # x: b * 1 * 28 * 28
        
        b = x.size(0)
        x= x.view(b, -1)
        
        # 1. Forward
        with torch.no_grad():
            l = Model(x)  # the l is a logit
        
        # 2. Objective function 
        j = Loss(l, y)
        
        Losses.append(j.item())
        
    print(f' Epoch {epoch + 1}, validation loss: {torch.tensor(Losses).mean() :.2f}')