In [92]:


import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np

In [93]:
from torchvision.transforms import v2

In [94]:
transforms = transforms.Compose([
                                 transforms.ToTensor()
])

In [95]:
train = datasets.MNIST("", train=True, transform=transforms, download=True)
train, valid = random_split(train, [50000, 10000])


In [96]:
trainloader = DataLoader(dataset=train, batch_size=16)
validloader = DataLoader(dataset=valid, batch_size=16)

In [116]:
import torch.nn as nn
import torch.nn.functional as F

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(28*28, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.view(x.shape[0], -1)  # Flatten the images
        x = F.leaky_relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.leaky_relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.leaky_relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

model = Network()

if torch.cuda.is_available():
    model = model.cuda()

model

Network(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (bn1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc4): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [117]:
if torch.cuda.is_available():
    model.cuda()

In [118]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

In [119]:
from tqdm import tqdm
epochs = 5

for e in range(epochs):
    train_loss = 0.0
    for data, labels in tqdm(trainloader):
        # Transfer Data to GPU if available
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        
        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = model(data)
        # Find the Loss
        loss = criterion(target,labels)
        # Calculate gradients 
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
    
    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(trainloader)}')

    valid_loss = 0.0
    total_correct = 0
    total_samples = 0
    model.eval()
    for data, labels in tqdm(validloader):
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        
        target = model(data)

        loss = criterion(target, labels)
        valid_loss += loss.item()

        _, predicted = torch.max(target, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)
    
    accuracy = 100 * total_correct / total_samples

    print(f'Epoch {e+1} \t\t Valid Loss: {valid_loss / len(validloader)}')
    print(f'Epoch {e+1} \t\t Valid accuracy: {accuracy:.2f}%')


100%|██████████| 3125/3125 [00:30<00:00, 101.50it/s]


Epoch 1 		 Training Loss: 0.5319087144267559


100%|██████████| 625/625 [00:02<00:00, 282.72it/s]


Epoch 1 		 Valid Loss: 0.163544378516078
Epoch 1 		 Valid accuracy: 95.12%


100%|██████████| 3125/3125 [00:31<00:00, 98.70it/s] 


Epoch 2 		 Training Loss: 0.12712911989283748


100%|██████████| 625/625 [00:01<00:00, 404.77it/s]


Epoch 2 		 Valid Loss: 0.13132612868305296
Epoch 2 		 Valid accuracy: 95.84%


100%|██████████| 3125/3125 [00:31<00:00, 100.59it/s]


Epoch 3 		 Training Loss: 0.08536323610623134


100%|██████████| 625/625 [00:02<00:00, 300.08it/s]


Epoch 3 		 Valid Loss: 0.10315553907565772
Epoch 3 		 Valid accuracy: 96.67%


100%|██████████| 3125/3125 [00:29<00:00, 105.55it/s]


Epoch 4 		 Training Loss: 0.061468556621604366


100%|██████████| 625/625 [00:01<00:00, 388.75it/s]


Epoch 4 		 Valid Loss: 0.11359863578426885
Epoch 4 		 Valid accuracy: 96.87%


100%|██████████| 3125/3125 [00:32<00:00, 96.90it/s] 


Epoch 5 		 Training Loss: 0.04740870903202711


100%|██████████| 625/625 [00:01<00:00, 412.78it/s]

Epoch 5 		 Valid Loss: 0.10214469307471882
Epoch 5 		 Valid accuracy: 97.03%



