<a href="https://colab.research.google.com/github/grasht/grashaw_GAN_research_project/blob/main/Torch_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

A tutorial exercise to familiarize oneself with PyTorch. We will load a dataset, define a neural network and train this model.

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np

In [2]:
print(torch.__version__)
print("CUDA available:", torch.cuda.is_available()) # Checks for GPU access

2.9.0+cu126
CUDA available: True


In [3]:
from tensorflow import train
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

batch_size = 64

#Create Data Loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X,y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

100%|██████████| 26.4M/26.4M [00:00<00:00, 111MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 3.52MB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 59.6MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 16.4MB/s]


Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


To define a neural network in PyTorch we must create a class that inherits from nn.Module. Layers are defined in the call to __init__. Operations are moved to the acceleartor if it is available otherwise we use the CPU.

Notice we use the `.to` method to explicitly move tensors to the accelerator.

In [4]:
from torch.nn.modules.activation import ReLU
#Set device (check for accelerator)
#device = torch.accelerator.current_accelerator().type if torch.cuda.is_available() else "cpu"
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device.")

#Create a class that inherits from nn.Module
#Define Model
class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10)
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NeuralNetwork().to(device)
print(model)


Using cuda device.
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Define the loss function, the optimizer, a train function, and a test function that computer accuracy and avg loss.

In [5]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    #Compute Prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    #Backpropogration
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if batch % 100 == 0:
      loss, current = loss.item(), (batch + 1) * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Testing is done in iterations called epochs. The model is learning parameters to ideally make better predicitons. Print the accuracy and loss at each epoch. We want to see it improve.

In [6]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.301360 [   64/60000]
loss: 2.289417 [ 6464/60000]
loss: 2.267507 [12864/60000]
loss: 2.268000 [19264/60000]
loss: 2.251094 [25664/60000]
loss: 2.215970 [32064/60000]
loss: 2.227084 [38464/60000]
loss: 2.193202 [44864/60000]
loss: 2.184705 [51264/60000]
loss: 2.160102 [57664/60000]
Test Error: 
 Accuracy: 46.0%, Avg loss: 2.151273 

Epoch 2
-------------------------------
loss: 2.161162 [   64/60000]
loss: 2.151790 [ 6464/60000]
loss: 2.092659 [12864/60000]
loss: 2.115367 [19264/60000]
loss: 2.062226 [25664/60000]
loss: 1.994939 [32064/60000]
loss: 2.027157 [38464/60000]
loss: 1.946974 [44864/60000]
loss: 1.942590 [51264/60000]
loss: 1.876181 [57664/60000]
Test Error: 
 Accuracy: 60.4%, Avg loss: 1.876970 

Epoch 3
-------------------------------
loss: 1.912251 [   64/60000]
loss: 1.880088 [ 6464/60000]
loss: 1.764233 [12864/60000]
loss: 1.807981 [19264/60000]
loss: 1.695525 [25664/60000]
loss: 1.644672 [32064/60000]
loss: 1.659299 [38464/

# Saving and loading models

In [7]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth", weights_only=True))

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Saved PyTorch Model State to model.pth
Predicted: "Ankle boot", Actual: "Ankle boot"


# Tensors
"Tensors are a specialized data structure that are very similar to arrays and matrices. In PyTorch, we use tensors to encode the inputs and outputs of a model, as well as the model’s parameters." - Getting Started with PyTorch (https://docs.pytorch.org/tutorials/beginner/basics/tensorqs_tutorial.html).

Tensors are like NP ndarrays except they work on GPUs and other hardware. They are also optomized for automatic differentiation.

In [8]:
#Tensors can be declared directly from data. Dtype is infered
data = [[1,2], [3,4]]
x_data = torch.tensor(data)

#Or they can be declared from NP arrays
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

#Or from another Tensor
x_ones = torch.ones_like(x_data) #retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) #overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")



Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.3993, 0.0536],
        [0.9519, 0.1332]]) 



In [9]:
#Attributes of a tensor
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}") #Shape
print(f"Datatype of tensor: {tensor.dtype}") #Datatype
print(f"Device tensor is stored on: {tensor.device}") #Where they are stored

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


There are over 1200 tensor operations (https://docs.pytorch.org/docs/stable/torch.html). They can all be run on the CPU and Accelerators (CUDA, MPS, MTIA, XPU).

To use an accelerator on Colab:
Runtime > Change runtime type > GPU

Recall we use `.to` to move tensors to device. We can also check that the device is available `if torch.accelerator.is_available():
    tensor = tensor.to(torch.accelerator.current_accelerator())`



In [11]:
#Tensors use standard indexing and slicing like NP
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

#cat to join tensors
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

#Arithmetic
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
# ``tensor.T`` returns the transpose of a tensor
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)

y3 = torch.rand_like(y1)
torch.matmul(tensor, tensor.T, out=y3)


# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

#Aggregate to one item and convert
agg = tensor.sum()
agg_item = agg.item()
print(agg_item, type(agg_item))

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])
tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])
12.0 <class 'float'>
