![PyTorch](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c6/PyTorch_logo_black.svg/640px-PyTorch_logo_black.svg.png)

In [None]:
import torch

print(f"Using torch v{torch.__version__}")

## Tensor Initialization

In [None]:
# Initialize zero dense tensor
dims = (4, 5, 3)
t = torch.zeros(*dims)
print(f"Shape:   {t.shape}")
print(f"Type:    {t.dtype}")
print(f"Layout:  {t.layout}")

In [None]:
# Initialize zero sparse tensor
t = torch.zeros(*dims, layout=torch.sparse_coo)
print(f"Shape:   {t.shape}")
print(f"Type:    {t.dtype}")
print(f"Layout:  {t.layout}")

In [None]:
# Random tensor
t1 = torch.randn(*dims) # Normal Distribution
t2 = torch.rand(*dims)  # Uniform Distribution
t3 = torch.randint(-10, 10, dims) # Unif Dist. (Categorical)

print(f"dtype t1: {t1.dtype}")
print(f"dtype t2: {t2.dtype}")
print(f"dtype t3: {t3.dtype}")

In [None]:
# Types are casted by operators
t4 = t1 + t2 * t3
print(f"dtype t4: {t4.dtype}")

In [None]:
# ...or manually casted
torch.random.manual_seed(42)
dims = (3, 2)
t5 = torch.randint(0, 2, dims)
t6 = t5.bool()
t7 = t5.float()
t8 = t5.long()
t9 = t5.double()
print(f"t5 ({t5.dtype}) =\n {t5}")
print(f"t6 ({t6.dtype}) =\n {t6}")
print(f"t7 ({t7.dtype}) =\n {t7}")
print(f"t8 ({t8.dtype}) =\n {t8}")
print(f"t9 ({t9.dtype}) =\n {t9}")

## Tensors in GPU

In [None]:
if torch.cuda.is_available:
  device = torch.device("cuda")
  devices = list(range(torch.cuda.device_count()))
  if len(devices) > 0:
    device = torch.device(f"cuda:{devices[0]}")
    print(f"Using GPU {device}")
  else:
    device = torch.device("cpu")
    print("CUDA available, but no GPU available")
else:
  print("CUDA is not available")
  device = torch.device("cpu")

device

In [None]:
if device != torch.device("cpu"):
  t1_gpu = t1.to(device)
  t2_gpu = t2.to("cuda")
  t3_gpu = t3.to("cuda:0")

  print(t1_gpu.device, t2_gpu.device, t3_gpu.device)

In [None]:
if device != torch.device("cpu"):
  t4_gpu = t1_gpu + t2_gpu * t3_gpu
  print(t4_gpu.device)

In [None]:
if device != torch.device("cpu"):
  try:
    t4_mixed = t4_gpu + t4
  except RuntimeError as e:
    t4_mixed = None
    print(e)

  assert t4_mixed is None

## Tensor Indexing

In [None]:
# Create integer range on tensor a with dim (3, 4, 2)
torch.manual_seed(42)
arr = torch.arange(24).reshape(3, 4, 2)

In [None]:
# Index the first two entries on the first dimension
arr[:2]

In [None]:
# Index the last two entries on the first dimension
arr[1:]

In [None]:
# Index the last two entries on the first dimension
arr[-2:]

In [None]:
# Mixed indexing
arr[1:, 1:3]

In [None]:
# Mixed indexing
arr[-2:, 1:3, :]

In [None]:
# Boolean conditions
even_entries = arr % 2 == 0
print(even_entries)
arr[even_entries] = -1
arr

## Tensor Reshaping

In [None]:
t = torch.randn(3, 1, 5)
t

In [None]:
t.squeeze().shape

In [None]:
t.unsqueeze(0).shape

In [None]:
t.unsqueeze(1).shape

In [None]:
t.unsqueeze(3).shape

In [None]:
t.transpose(1, 2).shape

In [None]:
t.permute(1, 0, 2).shape

## Tensor Reduction

In [None]:
t = torch.randn(5, 2, 3)

In [None]:
red_t = t.sum(0)
print(f"Shape: {red_t.shape}")
red_t

In [None]:
red_t = t.sum(0, keepdim=True)
print(f"Shape: {red_t.shape}")
red_t

In [None]:
red_t = t.mean(0)
print(f"Shape: {red_t.shape}")
red_t

In [None]:
red_t = t.mean((0, 2), keepdim=True)
print(f"Shape: {red_t.shape}")
red_t

## Tensor Initialization: Your Turn!

In [None]:
def relu_kaiming_init(in_size: int, out_size: int) -> torch.Tensor:
  pass

def relu_kaiming_init_(weights: torch.Tensor):
  pass

In [None]:
# Test test test!
torch.manual_seed(42)

# Right shape
t1 = relu_kaiming_init(5, 10)
assert t1.shape == (10, 5)

# Inplace edit
t2 = torch.zeros(30, 25)
t2_old = t2.clone()
relu_kaiming_init_(t2)
assert not torch.allclose(t2_old, t2)
assert torch.allclose(t2_old, torch.tensor(0.0))

# Mean
print(f"Mean: {t2.mean()}")
print(f"Std: {t2.std()}")

## AutoGrad

In [None]:
t = torch.randn(2, 4, 5)
t.data, t.grad, t.grad_fn

In [None]:
import torch

torch.manual_seed(42)
batch_size = 32
in_shape = 5
hh_shape = 10

# Input (Current Step)
x = torch.randn(batch_size, in_shape)
# Hidden Representation (Previous Step)
prev_h = torch.randn(batch_size, hh_shape)

# Computation
W_x = torch.randn(hh_shape, in_shape, requires_grad=True)
i2h = torch.mm(x, W_x.t())
W_h = torch.randn(hh_shape, hh_shape, requires_grad=True)
h2h = torch.mm(prev_h, W_h.t())
next_h = i2h + h2h
next_h = next_h.tanh()

print(next_h.shape)

# Let AutoGrad compute the derivative of tensors requiring grad
print(next_h.backward(torch.ones(batch_size, hh_shape)))

print(f"x.grad: {x.grad}")
print(f"prev_h.grad: {prev_h.grad}")
print(f"W_h.grad ({W_h.grad.shape})\n{W_h.grad}")
print(f"W_x.grad ({W_x.grad.shape})\n{W_x.grad}")


## Torch Module

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class ISPR_LinearLayer(nn.Module):
  def __init__(self, in_shape, out_shape, kaiming_custom = False):
    super().__init__()
    self.weight = nn.Parameter(torch.randn(out_shape, in_shape))
    self.bias = nn.Parameter(torch.randn(out_shape))

    if kaiming_custom:
      # Why would this lead to an error without torch.no_grad()?
      with torch.no_grad():
        relu_kaiming_init_(self.weight)
    else:
      # Default PyTorch Version
      nn.init.kaiming_uniform_(self.weight, mode="fan_in", nonlinearity="relu")

    nn.init.zeros_(self.bias)

  def forward(self, in_tensor):
    return in_tensor @ self.weight.t() + self.bias

In [None]:
class ISPR_MultilayerPerceptron(nn.Module):
  def __init__(self,
               layer_size: list[int],
               wrong_list: bool = False):
    """
    Multi-layer Perceptron, where the size
    of each layer is contained in the `layer_size` list.
    """
    super().__init__() #<- remember to call the superclass

    fully_connected = [
        ISPR_LinearLayer(layer_size[i], layer_size[i+1])
        for i in range(len(layer_size) - 1)]

    # Wrong list
    if wrong_list:
      self.fc = fully_connected
    else:
      self.fc = nn.ModuleList(fully_connected)

  def forward(self, x):

    for fc_layer in self.fc:
      x = fc_layer(x)
      x = F.relu(x)

    return x

layers = [5, 10, 20, 1]
net_1 = ISPR_MultilayerPerceptron(layers)
net_2 = ISPR_MultilayerPerceptron(layers, wrong_list=True)

print(f"Net1 parameters: {len(list(net_1.parameters()))}")
print(f"Net2 parameters: {len(list(net_2.parameters()))}")

## Dataset

In [None]:
# Set seed to generate simulated dataset
torch.manual_seed(42)

n_samples = 1000
x_data = torch.randn(n_samples, 5)
y_data = net_1(x_data) + 1e-2 * torch.randn(n_samples, 1)

# Important! Detach y_data
y_data = y_data.detach()

print(f"x_data shape: {x_data.shape}")
print(f"y_data shape: {y_data.shape}")

# Create dataset
dset = torch.utils.data.TensorDataset(x_data, y_data)

# Split train, val, test (0.7, 0.2, 0.1)
n_train = int(0.7 * n_samples)
n_val = int(0.2 * n_samples)
n_test = n_samples - n_train - n_val

# Split dataset
train_dset, val_dset, test_dset = \
  torch.utils.data.random_split(dset, [n_train, n_val, n_test])

# Create loaders
batch_size = 32
tr_loader = torch.utils.data.DataLoader(train_dset, batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dset, batch_size)
test_loader = torch.utils.data.DataLoader(test_dset, batch_size)

# Print lengths
print(f"Train loader length: {len(tr_loader)}")
print(f"Val loader length: {len(val_loader)}")
print(f"Test loader length: {len(test_loader)}")

## Training Loop

In [None]:
# Init Module
net_hat = ISPR_MultilayerPerceptron([5, 5, 5, 1])

# Mean Squared Error
loss_fn = nn.MSELoss()

# Stochastic Gradient Descent
optimizer = torch.optim.SGD(net_hat.parameters(), lr=1e-2)

# Training Loop
n_epochs = 500

tr_loss = []
vl_loss = []

for epoch in range(n_epochs):
  # Train
  net_hat.train()
  epoch_tr_loss = []
  for x, y in tr_loader:
    y_hat = net_hat(x)
    loss = loss_fn(y_hat, y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    epoch_tr_loss.append(loss.item())
  epoch_tr_loss = torch.tensor(epoch_tr_loss)
  epoch_tr_loss = torch.mean(epoch_tr_loss)

  # Validation
  net_hat.eval()
  epoch_val_loss = []
  with torch.no_grad():
    for x, y in val_loader:
      y_hat = net_hat(x)
      loss = loss_fn(y_hat, y)
      epoch_val_loss.append(loss.item())
  epoch_val_loss = torch.tensor(epoch_val_loss)
  epoch_val_loss = torch.mean(epoch_val_loss)

  if epoch % 10 == 0:
    print(f"Epoch. {epoch} | Train Loss: {epoch_tr_loss} | Val Loss: {epoch_val_loss}")

  tr_loss.append(epoch_tr_loss.item())
  vl_loss.append(epoch_val_loss.item())

In [None]:
# Test Set Performance
net_hat.eval()
test_loss = []
with torch.no_grad():
  for x, y in test_loader:
    y_hat = net_hat(x)
    loss = loss_fn(y_hat, y)
    test_loss.append(loss.item())
test_loss = torch.tensor(test_loss)
test_loss = torch.mean(test_loss).item()
print(f"Test Loss: {test_loss}")

In [None]:
# Seaborn plot training loss and validation loss
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme()

sns.lineplot(x=range(n_epochs), y=tr_loss, label="Train Loss")
sns.lineplot(x=range(n_epochs), y=vl_loss, label="Val Loss")
# Plot horizontal line for test loss
plt.axhline(y=test_loss, color='r', linestyle='--', label='Test Loss')
plt.legend()
plt.show()

## Your Turn!

1. Download the MNIST dataset using [torchvision](https://pytorch.org/vision/main/generated/torchvision.datasets.MNIST.html).
2. Instantiate a Convolutional Neural Network (CNN)
3. Train the CNN to classify the digits.

In [None]:
import torchvision
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F

# Seed
torch.manual_seed(42)

# Load Dataset
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

mnist_train = torchvision.datasets.MNIST(
    'data/', train=True, download=True, transform=transform)
mnist_test = torchvision.datasets.MNIST(
    'data/', train=False, download=True, transform=transform)


# TODO: Use GPU?

# TODO: Split MNIST Train and Validation

# TODO: Create data loaders

# TODO: Define class CNN

# TODO: Instantiate CNN

# TODO: Train the CNN

# TODO: Test Set Performance

# TODO: Plot Training Loss and Validation Loss (W&B, TensorBoard, or plain Matplotlib)