##Install Torch (compatible with book)

In [None]:
!pip install torch==2.4.1


Collecting torch==2.4.1
  Downloading torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.1)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.4.1)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.4.1)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.4.1)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.4.1)
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.4.1)
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-man

In [None]:
import torch
torch.__version__
torch.backends.mps.is_available()

False

## Creating Scalars, Tensors, Matrices


In [None]:
tensor0d = torch.tensor(1)

# create a 1D tensor (vector) from a Python list
tensor1d = torch.tensor([1, 2, 3])

# create a 2D tensor from a nested Python list
tensor2d = torch.tensor([[1, 2], [3, 4]])

# create a 3D tensor from a nested Python list
tensor3d = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

tensor0d, tensor1d, tensor2d, tensor3d

(tensor(1),
 tensor([1, 2, 3]),
 tensor([[1, 2],
         [3, 4]]),
 tensor([[[1, 2],
          [3, 4]],
 
         [[5, 6],
          [7, 8]]]))

#### Size, Reshape, Transpose

In [None]:
#checking the size of an tensor

tensor3d.size()

torch.Size([2, 2, 2])

In [None]:
#reshaping it
tensor3d.reshape(2,4)
tensor3d.view(4,1,2)

#transposing
tensor_symetric = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
tensor_symetric, torch.tensor([[1,2,3],[4,5,6],[7,8,9]])

#multiplying
tensor_to_multiply = torch.tensor([[4,5,6],[7,8,9],[10,11,12]])
tensor_symetric.matmul(tensor_to_multiply)


tensor([[ 48,  54,  60],
        [111, 126, 141],
        [174, 198, 222]])

## Seeing Models as Computation Graphs (autograd)

##### Autograd
We need to calculate gradients in order to adjust parameters. Autograd calculates gradients.
##### Single Layer Nueral Network  (logistic regression classifier)



#### Single Layer Network
y is our 'truth'. $x_1$ refers to the inputed value, and $w_1 \wedge b$ are the parameters that will be updated throughout training.

the $z$ equation calculates the raw input, $a$ converts it into a readable probability. $loss$ calculates the difference between the guess and the actual answer. in this case,

In [None]:
import torch.nn.functional as F

y = torch.tensor([1.0])  # true label
x1 = torch.tensor([1.1]) # input feature
w1 = torch.tensor([2.2]) # weight parameter
b = torch.tensor([0.0])  # bias unit

z = x1 * w1 + b          # net input
a = torch.sigmoid(z)     # activation & output

loss = F.binary_cross_entropy(a, y)
print(loss)

tensor(0.0852)


## Automatic Differentiation (Autograd)
This is what autograd does -- calculates gradients based on loss as calculated above. It back propogates through probability and loss calculations to optimize the parameters.


calculates $\frac{\partial loss}{\partial w_1}$ and $\frac{\partial loss}{\partial b}$ (our parameters). notice `requires_grad = True` is needed for the parameters because, originially, we didn't set anything up for backpropogation. `loss.backward()` and the following commands are the most optimal way to get the gradients.

[concept video](https://www.youtube.com/watch?v=Ilg3gGewQ5U)

In [None]:
import torch
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)

grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b = grad(loss, b, retain_graph=True)
print(grad_L_w1, grad_L_b)
# or
loss.backward()
print(w1.grad, b.grad)


(tensor([-0.0898]),) (tensor([-0.0817]),)
tensor([-0.0898]) tensor([-0.0817])


## Implmenting Network

All models are build on this based class with these imports. There could be an easier way but this is a good start. Each linear layer is of form $z = x_1w_1 + b$

It's a bit confusing, but once we have areguments like `Linear(30,20)`, we start to get weights of $w \in \mathbb{R}^{30}$.

Each layer is defined by $w \wedge b $ and is transmitted to `ReLu()` or basically $\delta$


In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn

class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(

            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

initialize the model, see the amount of parameters it has (trainable as in it can be backpropogated and gradients can be calculated), and the example see of the first layer's weight.


---


a cool fact to notice is that the weights change. that's because at the start, it's a random guess. As it learns, the weights will be tweaked, but at the first instance it's a shot in the dark.



In [None]:
model = NeuralNetwork(50, 3)

num_params = sum( #amount of trainable parameters
    p.numel() for p in model.parameters() if p.requires_grad
)

print(model, num_params, model.layers[0].weight.shape)

print(model.layers[0].weight)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
) 2213 torch.Size([30, 50])
Parameter containing:
tensor([[-0.1246,  0.0794,  0.0908,  ...,  0.0961, -0.0873,  0.0717],
        [ 0.0697, -0.0213,  0.1372,  ..., -0.0605,  0.1081, -0.0019],
        [ 0.0448, -0.0750, -0.0271,  ...,  0.0299, -0.0588, -0.1273],
        ...,
        [ 0.0848, -0.0094,  0.1189,  ...,  0.0619,  0.1218, -0.0663],
        [-0.0727,  0.0426, -0.0797,  ...,  0.0073, -0.1397,  0.0747],
        [-0.0868,  0.1097,  0.0014,  ...,  0.0015, -0.0874,  0.0907]],
       requires_grad=True)


#### Taking a Forward Step

Here, calling `out = model(X)` performs on forward pass through the system and outputs the last 'guess'

Notice that it has that last line, `grad_fn = <AddmmBackward0>`. This is letting us know we set up the model for backpropogation -- if there was a weird situation where this was not necessary, as in we didn't want to train, we wolldn't need to add.

In [None]:
torch.manual_seed(123)

X = torch.rand((1, 50))
out = model(X)
print(out)

tensor([[-0.1575,  0.2124, -0.1047]], grad_fn=<AddmmBackward0>)


## Dataset Loaders

This code below creates represents two sample datasets, in csv style formatting where each training index maps to the truth index. The purpose of this data class is just have a way to access values in the dataset.

In [None]:
import torch

X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

y_train = torch.tensor([0, 0, 0, 1, 1])
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6],
])

y_test = torch.tensor([0, 1])

In [None]:
from torch.utils.data import Dataset


class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y

    def __len__(self):
        return self.labels.shape[0]

    def __repr__(self) -> str:
        return f"Dataset({self.features}, {self.labels})"

train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)


###using the dataloader

we establish manual seed because we want to randomize how the batches are fed, and also establish them in the same way each time. another key is that batch size tells how many data to insert at a time, and num workers speeds up the process but at expense of computer resources.

finally at the end we iterate through the batch and take a look at each training data that is sent into the model.



In [None]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

for idx, (features, labels) in enumerate(train_loader):
    print(f"Batch {idx + 1}: {features}, {labels}")


Batch 1: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]), tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]), tensor([0, 0])


## Training Loops

### 3 step training example

We initialize our custom model with two inputs and two outputs because our data has two features, and in this particular case, we want two output guesses.

Notice that we optimize it with a learning rate, arbitrarily ( ill understnd later) and set a number of epochs (also arbitrary).

For three passes throughout the nueral network, we set the model into train mode and iterate through the batch of data. Each time, we step forward throughout the network, calculate the loss, and optimize it for the next initilization of the model.


In [None]:
import torch.nn.functional as F


torch.manual_seed(123) #ok
model = NeuralNetwork(num_inputs=2, num_outputs=2) #calls the custom class we made
optimizer = torch.optim.SGD(model.parameters(), lr=0.5) #optimizer before each loop

num_epochs = 3 #how many steps

i = 0

for epoch in range(num_epochs):

    model.train() # set model in training mode
    j = 0
    for batch_idx, (features, labels) in enumerate(train_loader): #train loader is the dataset loader


        logits = model(features) #forward step

        loss = F.cross_entropy(logits, labels) # Loss function for step

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        ### LOGGING
        print(f"Epoch: {i+1}/{num_epochs}, Batch: {j+1}/{len(train_loader)}, Train Loss: {loss}")
        j += 1

    i += 1

    model.eval()

Epoch: 1/3, Batch: 1/2, Train Loss: 0.7487356662750244
Epoch: 1/3, Batch: 2/2, Train Loss: 0.6450306177139282
Epoch: 2/3, Batch: 1/2, Train Loss: 0.44225579500198364
Epoch: 2/3, Batch: 2/2, Train Loss: 0.12562280893325806
Epoch: 3/3, Batch: 1/2, Train Loss: 0.026905544102191925
Epoch: 3/3, Batch: 2/2, Train Loss: 0.0043272399343550205


### Making Predictions / Evaluating Results
here, we make predictions (albeit on the same data) and map them to their assumption (true or false)

In [None]:
model.eval()

with torch.no_grad():
    outputs = model(X_train)

torch.set_printoptions(sci_mode=False)
probas = torch.softmax(outputs, dim=1)
print(probas)

predictions = torch.argmax(probas, dim=1)
print(predictions)

tensor([[    0.9991,     0.0009],
        [    0.9982,     0.0018],
        [    0.9949,     0.0051],
        [    0.0491,     0.9509],
        [    0.0307,     0.9693]])
tensor([0, 0, 0, 1, 1])


### compute accuracy template function

In [None]:
def compute_accuracy(model, dataloader):

    model = model.eval()
    correct = 0.0
    total_examples = 0

    for idx, (features, labels) in enumerate(dataloader):

        with torch.no_grad():
            logits = model(features)

        predictions = torch.argmax(logits, dim=1)
        compare = labels == predictions
        correct += torch.sum(compare)
        total_examples += len(compare)

    return (correct / total_examples).item()

print(compute_accuracy(model, train_loader)) #100%

1.0


## Saving / Loading

In [None]:
torch.save(model.state_dict(), "model.pth")
model = NeuralNetwork(2, 2) # needs to match the original model exactly
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>