# Lab_01


## 0) Setup & Imports


In [None]:
import sys, os, math, random, datetime
from pathlib import Path
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

base = Path.cwd()
(base/'reports/images').mkdir(parents=True, exist_ok=True)

def savefig(name):
    plt.savefig(base/'reports'/'images'/name, bbox_inches='tight'); plt.close()

torch.manual_seed(42); random.seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

## 1) Tensors & Arrays
Explain N (batch), C (channels), H/W (height/width). Show a quick shape check.

In [None]:
x = torch.randn(32,1,28,28)
y = torch.randn(16,3,64,64)
print('grayscale batch:', x.shape, '| RGB batch:', y.shape)

## 2–3) FFNN + Training Loop & Backprop
Train ≥3 epochs on MNIST. Plot loss, report test accuracy.

In [None]:
class FFNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Flatten(), nn.Linear(28*28,128), nn.ReLU(), nn.Linear(128,10))
    def forward(self,x): return self.net(x)

tfm = transforms.ToTensor()
train_ds = datasets.MNIST('./data', train=True, download=True, transform=tfm)
test_ds  = datasets.MNIST('./data', train=False, download=True, transform=tfm)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=256, shuffle=False)

def accuracy(model, loader):
    model.eval(); c=t=0
    with torch.no_grad():
        for X,y in loader:
            X,y = X.to(device), y.to(device)
            p = model(X).argmax(1)
            c += (p==y).sum().item(); t += y.size(0)
    return c/t

ffnn = FFNN().to(device)
opt = torch.optim.SGD(ffnn.parameters(), lr=0.01, momentum=0.9)
crit = nn.CrossEntropyLoss()
losses=[]
for ep in range(3):
    ffnn.train(); run=0.0
    for X,y in train_loader:
        X,y = X.to(device), y.to(device)
        opt.zero_grad(); loss = crit(ffnn(X), y); loss.backward(); opt.step()
        run += loss.item()
    losses.append(run/len(train_loader))
    print(f'[FFNN] epoch {ep+1} loss={losses[-1]:.4f}')
import matplotlib.pyplot as plt
plt.figure(); plt.plot(losses); plt.title('FFNN Training Loss'); plt.xlabel('epoch'); plt.ylabel('loss'); savefig('mlp_loss.png')
print('[FFNN] test accuracy:', round(accuracy(ffnn,test_loader)*100,2),'%')

## 3b) Gradient Check (finite differences)
Verify autograd vs. numerical gradient on a tiny MLP.

In [None]:
torch.manual_seed(0)
tiny = nn.Sequential(nn.Flatten(), nn.Linear(28*28,5), nn.ReLU(), nn.Linear(5,3)).to(device)
xb = torch.randn(4,1,28,28, device=device); yb = torch.tensor([0,1,2,1], device=device)
tiny.zero_grad(); L = nn.CrossEntropyLoss()(tiny(xb), yb); L.backward()
anal = tiny[1].weight.grad.detach().view(-1).cpu()
eps=1e-4; num = torch.zeros_like(anal)
w = tiny[1].weight.data.view(-1)
for i in range(min(50, w.numel())):
    old = w[i].item()
    w[i] = old + eps; Lp = nn.CrossEntropyLoss()(tiny(xb), yb).item()
    w[i] = old - eps; Lm = nn.CrossEntropyLoss()(tiny(xb), yb).item()
    w[i] = old; num[i] = (Lp-Lm)/(2*eps)
print('Gradient check — mean |analytical−numerical| over 50 params:', float((anal[:50]-num[:50]).abs().mean()))

## 4) CNN baseline vs FFNN + filter visualization

In [None]:
class SmallCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.c1 = nn.Conv2d(1,8,3,padding=1)
        self.c2 = nn.Conv2d(8,16,3,padding=1)
        self.fc = nn.Linear(16*7*7,10)
    def forward(self,x):
        x = F.relu(self.c1(x)); x = F.max_pool2d(x,2)
        x = F.relu(self.c2(x)); x = F.max_pool2d(x,2)
        x = torch.flatten(x,1); return self.fc(x)

cnn = SmallCNN().to(device)
opt = torch.optim.SGD(cnn.parameters(), lr=0.05, momentum=0.9)
losses=[]
for ep in range(3):
    cnn.train(); run=0.0
    for X,y in train_loader:
        X,y = X.to(device), y.to(device)
        opt.zero_grad(); loss = nn.CrossEntropyLoss()(cnn(X), y); loss.backward(); opt.step()
        run += loss.item()
    losses.append(run/len(train_loader))
    print(f'[CNN] epoch {ep+1} loss={losses[-1]:.4f}')
plt.figure(); plt.plot(losses); plt.title('CNN Training Loss'); plt.xlabel('epoch'); plt.ylabel('loss'); savefig('cnn_loss.png')
print('[CNN] test accuracy:', round((lambda m,ldr: (sum((m(X.to(device)).argmax(1)==y.to(device)).sum().item() for X,y in ldr))/sum(y.size(0) for _,y in ldr))(cnn,test_loader)*100,2),'%')

# Filter visualization
with torch.no_grad(): w = cnn.c1.weight.detach().cpu()
fig,axs = plt.subplots(1,w.shape[0], figsize=(w.shape[0]*2,2))
for i,ax in enumerate(axs): ax.imshow(w[i,0], cmap='gray'); ax.axis('off')
fig.suptitle('Conv1 filters'); savefig('conv1_filters.png')

## 5) Hyperparameter sweep (learning rate)
Compare LR ∈ {0.005, 0.05, 0.5} for 2 quick epochs.

In [None]:
def train_cnn_lr(lr):
    m=SmallCNN().to(device); o=torch.optim.SGD(m.parameters(), lr=lr, momentum=0.9)
    L=[]
    for ep in range(2):
        m.train(); run=0.0
        for X,y in train_loader:
            X,y = X.to(device), y.to(device)
            o.zero_grad(); loss = nn.CrossEntropyLoss()(m(X), y); loss.backward(); o.step(); run += loss.item()
        L.append(run/len(train_loader))
    return L
hist = {lr: train_cnn_lr(lr) for lr in (0.005, 0.05, 0.5)}
plt.figure()
for lr,L in hist.items(): plt.plot(L, label=f'lr={lr}')
plt.legend(); plt.title('LR Sweep (loss)'); plt.xlabel('epoch'); plt.ylabel('loss'); savefig('lr_loss.png')

## 6) Convolution arithmetic (formula + sanity)
Formula:  
\(H' = \lfloor (H+2p-k)/s \rfloor + 1,\ W' = \lfloor (W+2p-k)/s \rfloor + 1\)

In [None]:
def conv2d_out(H,W,k=3,s=1,p=0): return (H+2*p-k)//s + 1, (W+2*p-k)//s + 1
print('28x28, k=3,s=1,p=1 ->', conv2d_out(28,28,3,1,1))
x = torch.zeros(1,1,64,64); y = nn.Conv2d(1,1,3,2,1)(x); print('Torch says:', tuple(y.shape[-2:]))