In [1]:
"""
Notes on tutorial: https://docs.pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
"""

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
"""
PyTorch includes many domain specific libraries (TorchText, TorchVision, TorchAudio...) that include datasets. Here, we use a TorchVision dataset.
Each Dataset includes two arguments to modify the samples and the labels respectively (transform and target_transform).
"""

# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
"""
By wrapping the PyTorch DataLoader around the Dataset, we create an iterable. This iterable (DataLoader) supports automatic batching, sampling, shuffling, and multiprocess data loading, and thus is very powerful.
"""

batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

""" 
Questions: How do I get data that I upload into a DataLoader?
"""

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


' \nQuestions: How do I get data that I upload into a DataLoader?\n'

In [4]:
"""
In PyTorch, we build neural networks by creating a class that inherits from nn.Module. The layers of the network
are defined in the __init__() function, and we define how data flows through the network in the forward() function. An
accelerator, such as CUDA, MPS, MTIA, or XPU, can be added to speed up variuos aspects of training. If this is not 
available, PyTorch will default to the cpu.
"""

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
"""
To train a model, we must define two things: A loss function and an optimizer.

Then we must define the training loop. For each iteration, the model predicts on the training
set (split into batches) and backpropogates the error to update the model parameters.

Each iteration, we can also check the test loss and accuracy to assess our models learning process.
"""

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# Train the model
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

"""
What are the details of the training loop?
"""

Epoch 1
-------------------------------
loss: 2.304651  [   64/60000]
loss: 2.292750  [ 6464/60000]
loss: 2.261925  [12864/60000]
loss: 2.267838  [19264/60000]
loss: 2.254951  [25664/60000]
loss: 2.209020  [32064/60000]
loss: 2.237845  [38464/60000]
loss: 2.190547  [44864/60000]
loss: 2.184268  [51264/60000]
loss: 2.157164  [57664/60000]
Test Error: 
 Accuracy: 32.6%, Avg loss: 2.149420 

Epoch 2
-------------------------------
loss: 2.164298  [   64/60000]
loss: 2.152345  [ 6464/60000]
loss: 2.084271  [12864/60000]
loss: 2.112251  [19264/60000]
loss: 2.066629  [25664/60000]
loss: 1.994694  [32064/60000]
loss: 2.036831  [38464/60000]
loss: 1.948733  [44864/60000]
loss: 1.949462  [51264/60000]
loss: 1.883520  [57664/60000]
Test Error: 
 Accuracy: 56.8%, Avg loss: 1.883447 

Epoch 3
-------------------------------
loss: 1.915007  [   64/60000]
loss: 1.888554  [ 6464/60000]
loss: 1.762947  [12864/60000]
loss: 1.817472  [19264/60000]
loss: 1.713506  [25664/60000]
loss: 1.654388  [32064/600

'\nWhat are the details of the training loop?\n'

In [6]:
"""
To save a model, we typically save the parameters in a dictionary.
When loading a model, we make an instance of it and then load the parameters from the dictionary.
"""

torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth", weights_only=True))

Saved PyTorch Model State to model.pth


<All keys matched successfully>

In [7]:
"""
Now that we loaded the trained model, we can make predictions with it!
"""

classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"


In [9]:
"""
Intro to tensors
Following turorial: https://docs.pytorch.org/tutorials/beginner/basics/tensorqs_tutorial.html

Tensors are very similar to ndarrays, but they are optimized to run on GPUs and automatic differentiation. The ndarray and Tensor APIs are very similar, 
but to review here we go.
"""

import torch
import numpy as np

In [12]:
"""
Initializing a tensor
"""

# Directly from data
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

# From a NumPy array
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

# From another tensor
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

# From random or constant values
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.9309, 0.3243],
        [0.7573, 0.7000]]) 

Random Tensor: 
 tensor([[0.0969, 0.0452, 0.4686],
        [0.8907, 0.4099, 0.3323]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [None]:
"""
Attributes of a Tensor
"""

tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")


Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [18]:
"""
Operations on a Tensor

There are over 1200 tensor operations supported by PyTorch both on the CPU and on accelerators. Tensors are by default created 
on the CPU and must be moved to other devices if desired.
"""

# We move our tensor to the current accelerator if available
if torch.accelerator.is_available():
    tensor = tensor.to(torch.accelerator.current_accelerator())

# Some operations
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

# Joining tensors
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

# Arithmetic operations
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
# ``tensor.T`` returns the transpose of a tensor
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)

y3 = torch.rand_like(y1)
torch.matmul(tensor, tensor.T, out=y3)


# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

# Single element tensors. If you want a numeric, use .item()
agg = tensor.sum()
agg_item = agg.item()
print(agg_item, type(agg_item))

# In-place operations are denoted by a _ suffix to the function name.
print(f"{tensor} \n")
tensor.add_(5)
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])
tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])
12.0 <class 'float'>
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


In [None]:
"""
Bridges to NumPy

Tensors on the CPU and NumPy arrays can share the same memory locations.
"""

# Create tensors and convert to NumPy array
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

# Add one to Tensor elements and show NumPy array is affected
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]
t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


In [31]:
"""
Building the Neural Network

Following Tutorial: https://docs.pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html
"""

import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
"""
Nerual networks are composed of layers and modules that operate on data. The torch.nn namespace
has everything necessary to build neural networks in PyTorch. Every module in PyTorch subclasses
the nn.Module. A network is simply a module that contains other modules, creating an easy to work
with hierarchical structure.
"""

# Access accelerator if available.
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")


# Our neural network subclassses the nn.Module.
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [41]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6])
