In [2]:
import torch
import numpy as np

print(torch.__version__)
print(torch.backends.mps.is_available())

2.4.0
True


# Dataset & DataLoader

In [3]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

y_train = torch.tensor([0, 0, 0, 1, 1])

In [4]:
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6],
])

y_test = torch.tensor([0, 1])

In PyTorch, the three main components of a custom Dataset class are the
__init__ constructor, the __getitem__ method, and the __len__ method (see listing
A.6). In the __init__ method, we set up attributes that we can access later in the
__getitem__ and __len__ methods. These could be file paths, file objects, database
connectors, and so on. Since we created a tensor dataset that sits in memory, we
simply assign X and y to these attributes, which are placeholders for our tensor
objects.

In [5]:
from torch.utils.data import Dataset


class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]        
        return one_x, one_y

    def __len__(self):
        return self.labels.shape[0]

train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [6]:
print(len(train_ds))
print(len(test_ds))
print(train_ds[0])
print(train_ds[1])

5
2
(tensor([-1.2000,  3.1000]), tensor(0))
(tensor([-0.9000,  2.9000]), tensor(0))


Setting num_workers=4 usually leads to optimal performance on
many real-world datasets, but optimal settings depend on your hardware and the code
used for loading a training example defined in the Dataset class.

In [7]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0
)

In [8]:
test_ds = ToyDataset(X_test, y_test)

test_loader = DataLoader(
    dataset=test_ds,
    batch_size=2,
    shuffle=False,
    num_workers=0
)


if you iterate over the dataset a second time, you will see that the shuffling order will
change. This is desired to prevent deep neural networks from getting caught in repetitive
update cycles during training.

In [9]:
for idx, (x, y) in enumerate(train_loader):
    print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])


In practice, having a substantially smaller batch as the last batch in a training epoch
can disturb the convergence during training. To prevent this, set drop_last=True,
which will drop the last batch in each epoch, as shown in the following listing.

In [10]:
train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

for idx, (x, y) in enumerate(train_loader):
    print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 2: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])


# Training loop

In [11]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(
                
            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

We also introduced new settings called model.train() and model.eval(). As these
names imply, these settings are used to put the model into a training and an evaluation
mode. 

This is necessary for components that behave differently during training
and inference, such as dropout or batch normalization layers. 

Since we don’t have dropout or other components in our NeuralNetwork class 
that are affected by these settings,
using model.train() and model.eval() is redundant in our preceding code. 

However, it’s best practice to include them anyway to avoid unexpected behaviors when we
change the model architecture or reuse the code to train a different model.

In [12]:
import torch.nn.functional as F


torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5) # The optimizer needs to know which parameters to optimize.

num_epochs = 3

for epoch in range(num_epochs):
    
    model.train()
    for batch_idx, (features, labels) in enumerate(train_loader):

        logits = model(features)
        
        loss = F.cross_entropy(logits, labels) # Loss function
        
        optimizer.zero_grad() # Sets the gradients from the previous round to 0 to prevent unintended gradient accumulation
        loss.backward()
        optimizer.step() # The optimizer uses the gradients to update the model parameters.
    
        ### LOGGING
        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
              f" | Batch {batch_idx:03d}/{len(train_loader):03d}"
              f" | Train/Val Loss: {loss:.2f}")

    model.eval()
    # Optional model evaluation

Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65
Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44
Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13
Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03
Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00


we pass the logits directly into the cross_entropy loss function,
which will apply the softmax function internally for efficiency and numerical
stability reasons.

In [21]:
model.eval()
with torch.no_grad():
    outputs = model(X_train)

print(outputs)

tensor([[ 2.8569, -4.1618],
        [ 2.5382, -3.7548],
        [ 2.0944, -3.1820],
        [-1.4814,  1.4816],
        [-1.7176,  1.7342]])


In [24]:
torch.set_printoptions(sci_mode=False) # This is optional, but it makes the output more readable.
probas = torch.softmax(outputs, dim=1)
print(probas)

predictions = torch.argmax(probas, dim=1)
print(predictions)

tensor([[    0.9991,     0.0009],
        [    0.9982,     0.0018],
        [    0.9949,     0.0051],
        [    0.0491,     0.9509],
        [    0.0307,     0.9693]])
tensor([0, 0, 0, 1, 1])


In [15]:
predictions == y_train

tensor([True, True, True, True, True])

In [16]:
torch.sum(predictions == y_train)

tensor(5)

In [17]:
def compute_accuracy(model, dataloader):

    model = model.eval()
    correct = 0.0
    total_examples = 0
    
    for idx, (features, labels) in enumerate(dataloader):
        
        with torch.no_grad():
            logits = model(features)
        
        predictions = torch.argmax(logits, dim=1)
        compare = labels == predictions
        correct += torch.sum(compare)
        total_examples += len(compare)

    return (correct / total_examples).item()

compute_accuracy(model, train_loader)

1.0

In [18]:
compute_accuracy(model, test_loader)

1.0

# Save & Load Model

Here’s the recommended way how we can save and load models in PyTorch.

The model’s state_dict is a Python dictionary object that maps each layer in the
model to its trainable parameters (weights and biases). "model.pth" is an arbitrary
filename for the model file saved to disk. We can give it any name and file ending we
like; however, .pth and .pt are the most common conventions.

In [19]:
torch.save(model.state_dict(), "model.pth")
model = NeuralNetwork(2, 2) # needs to match the original model exactly
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>

The line model = NeuralNetwork(2, 2) is not strictly necessary if you execute this
code in the same session where you saved a model. However, I included it here to
illustrate that we need an instance of the model in memory to apply the saved
parameters. Here, the NeuralNetwork(2, 2) architecture needs to match the original
saved model exactly.