In [5]:
"""
Lab 5 — Introduction to PyTorch
"""

import sys
from pathlib import Path
import torch
import numpy as np
from torch import nn

# Reproducibility
torch.manual_seed(0)
np.random.seed(0)

def part1_tensors():
    """Part 1: create tensors, demonstrate operations, indexing and reshape."""
    data = [[1, 2], [3, 4]]
    x_data = torch.tensor(data)
    x_np = torch.from_numpy(np.array(data))
    ones = torch.ones_like(x_data)
    rand = torch.rand_like(x_data, dtype=torch.float)
    matmul = x_data @ x_data.T
    arr = torch.rand(4, 4)
    reshaped = arr.view(16, 1)
    return {
        "x_data": x_data,
        "x_np": x_np,
        "ones": ones,
        "rand": rand,
        "matmul": matmul,
        "reshaped_shape": reshaped.shape
    }

def part2_autograd():
    """Part 2: autograd demonstration and safe patterns for repeated backward calls."""
    x = torch.ones(1, requires_grad=True)
    # z = 3 * (x + 2)^2
    z = 3 * ((x + 2) ** 2)
    # Standard use: compute backward once and inspect gradient
    if x.grad is not None:
        x.grad.zero_()
    z.backward()
    grad_after_first = x.grad.clone()

    # Safe pattern: zero grad and recompute forward before backward again
    x.grad.zero_()
    z = 3 * ((x + 2) ** 2)  # recompute for fresh graph
    z.backward()
    grad_after_recompute = x.grad.clone()

    # Debug pattern only: retain_graph=True to allow multiple backward calls on same graph
    x.grad.zero_()
    z = 3 * ((x + 2) ** 2)
    z.backward(retain_graph=True)
    grad_after_retain_once = x.grad.clone()
    z.backward(retain_graph=True)
    grad_after_retain_twice = x.grad.clone()

    # Clear gradient for cleanliness
    x.grad.zero_()

    return {
        "grad_after_first": grad_after_first,
        "grad_after_recompute": grad_after_recompute,
        "grad_after_retain_once": grad_after_retain_once,
        "grad_after_retain_twice": grad_after_retain_twice
    }

class SimpleModel(nn.Module):
    """Simple model with embedding + mean pooling + two linear layers (small example)."""
    def __init__(self, vocab_size: int, emb_dim: int, hid_dim: int, out_dim: int):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.lin = nn.Linear(emb_dim, hid_dim)
        self.act = nn.ReLU()
        self.out = nn.Linear(hid_dim, out_dim)

    def forward(self, x):
        # Expect x shape: (batch_size, seq_len)
        e = self.embedding(x)  # (batch, seq_len, emb_dim)
        if e.dim() == 3:
            v = e.mean(dim=1)   # mean pooling across sequence length
        else:
            v = e.mean(dim=0)
        h = self.act(self.lin(v))
        return self.out(h)

def part3_nn_examples():
    """Part 3: demonstrate Linear, Embedding, and SimpleModel."""
    linear = nn.Linear(5, 2)
    sample_in = torch.randn(3, 5)
    linear_out = linear(sample_in)

    embedding = nn.Embedding(10, 3)
    emb_indices = torch.LongTensor([1, 5, 0, 8])
    emb_out = embedding(emb_indices)

    model = SimpleModel(vocab_size=100, emb_dim=16, hid_dim=8, out_dim=2)
    model_in = torch.LongTensor([[1, 2, 5, 9]])
    model_out = model(model_in)

    return {
        "linear_out_shape": linear_out.shape,
        "emb_out_shape": emb_out.shape,
        "model_out_shape": model_out.shape,
        "model_out": model_out.detach()
    }

def training_demo_synthetic(num_samples=20, seq_len=6, vocab_size=50):
    """Small training loop demo on synthetic data (1 epoch)."""
    X = torch.randint(0, vocab_size, (num_samples, seq_len), dtype=torch.long)
    Y = torch.randint(0, 2, (num_samples,), dtype=torch.long)
    model = SimpleModel(vocab_size=vocab_size, emb_dim=16, hid_dim=8, out_dim=2)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    batch_size = 4
    losses = []
    model.train()
    for i in range(0, num_samples, batch_size):
        xb = X[i:i+batch_size]
        yb = Y[i:i+batch_size]
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    return losses

def main():
    # Execute parts and print concise results for instructor
    p1 = part1_tensors()
    p2 = part2_autograd()
    p3 = part3_nn_examples()
    train_losses = training_demo_synthetic()

    # Minimal, formal output
    print("Lab 5 — PyTorch: Summary results")
    print("Part 1: tensor matmul:\n", p1["matmul"])
    print("Part 1: reshaped shape:", p1["reshaped_shape"])
    print("Part 2: grad after first backward:", p2["grad_after_first"].item())
    print("Part 2: grad after recompute+backward:", p2["grad_after_recompute"].item())
    print("Part 2: grad after retain_graph once:", p2["grad_after_retain_once"].item())
    print("Part 2: grad after retain_graph twice (accumulated):", p2["grad_after_retain_twice"].item())
    print("Part 3: linear_out_shape:", p3["linear_out_shape"])
    print("Part 3: emb_out_shape:", p3["emb_out_shape"])
    print("Part 3: model_out_shape:", p3["model_out_shape"])
    print("Training demo losses (per batch):", train_losses)

if __name__ == '__main__':
    main()


Lab 5 — PyTorch: Summary results
Part 1: tensor matmul:
 tensor([[ 5, 11],
        [11, 25]])
Part 1: reshaped shape: torch.Size([16, 1])
Part 2: grad after first backward: 18.0
Part 2: grad after recompute+backward: 18.0
Part 2: grad after retain_graph once: 18.0
Part 2: grad after retain_graph twice (accumulated): 36.0
Part 3: linear_out_shape: torch.Size([3, 2])
Part 3: emb_out_shape: torch.Size([4, 3])
Part 3: model_out_shape: torch.Size([1, 2])
Training demo losses (per batch): [0.6951454877853394, 0.707666277885437, 0.6541890501976013, 0.6947219371795654, 0.6914669275283813]
