## Introduction

In [1]:
import torch
torch.__version__

'2.3.0'

In [3]:
# for windows/linux 
torch.cuda.is_available()

False

In [4]:
# for mac with GPU 
torch.backends.mps.is_available()

True

## Understanding tensors

In [6]:
torch0d = torch.tensor(10)
torch1d = torch.tensor([1,2,3,4])
torch2d = torch.tensor([[1,2,3], [4,5,6]])
torch3d = torch.tensor([[[1,2,3], [4,5,6]], 
                        [[7,8,9], [10,11,12]]])

In [7]:
torch0d

tensor(10)

In [8]:
torch1d

tensor([1, 2, 3, 4])

In [9]:
torch2d

tensor([[1, 2, 3],
        [4, 5, 6]])

In [10]:
torch3d

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])

In [12]:
torch0d.ndim, torch1d.ndim, torch2d.ndim, torch3d.ndim

(0, 1, 2, 3)

In [15]:
torch0d.dtype, torch3d.dtype

(torch.int64, torch.int64)

In [16]:
floatvec = torch.tensor([[1,2,3], [4,5,6.]])

In [18]:
floatvec

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [19]:
floatvec.dtype

torch.float32

In [20]:
floatvec = torch.tensor([[1,2,3], [4,5,6]], dtype=torch.float32)
floatvec

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [21]:
floatvec.dtype

torch.float32

In [22]:
floatvec = torch.tensor([[1,2,3], [4,5,6]], dtype=torch.float64)
floatvec

tensor([[1., 2., 3.],
        [4., 5., 6.]], dtype=torch.float64)

In [25]:
# # This will throw an error 
# # TypeError: tensor(): argument 'dtype' must be torch.dtype, not type

# import numpy as np

# floatvec = torch.tensor([[1,2,3], [4,5,6]], dtype=np.float32)
# floatvec

In [26]:
floatvec

tensor([[1., 2., 3.],
        [4., 5., 6.]], dtype=torch.float64)

In [30]:
floatvec = floatvec.to(torch.float32)

In [31]:
floatvec

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [32]:
floatvec.dtype

torch.float32

In [33]:
torch0d.shape, torch1d.shape

(torch.Size([]), torch.Size([4]))

In [38]:
torch0d.ndim, torch1d.ndim

(0, 1)

In [39]:
torch2d.ndim, torch3d.ndim

(2, 3)

In [40]:
torch2d.shape, torch3d.shape

(torch.Size([2, 3]), torch.Size([2, 2, 3]))

In [41]:
torch2d.reshape(1,6)

tensor([[1, 2, 3, 4, 5, 6]])

In [42]:
torch2d.reshape(3,2)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [43]:
torch2d.reshape(3,-1)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [46]:
torch2d.reshape(-1,3)

tensor([[1, 2, 3],
        [4, 5, 6]])

In [48]:
torch2d.view(-1,6)

tensor([[1, 2, 3, 4, 5, 6]])

In [49]:
torch2d.reshape(-1,6)

tensor([[1, 2, 3, 4, 5, 6]])

In [50]:
torch3d

tensor([[[ 1,  2,  3],
         [ 4,  5,  6]],

        [[ 7,  8,  9],
         [10, 11, 12]]])

In [51]:
torch2d

tensor([[1, 2, 3],
        [4, 5, 6]])

In [52]:
torch2d.T

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [55]:
torch1d

tensor([1, 2, 3, 4])

In [57]:
torch2d = torch.tensor([[1, 2, 3, 4],
                       [5, 6, 7, 8]])

torch2d.view(4, -1)

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

In [58]:
torch2d = torch2d.view(4, -1)

In [59]:
torch2d

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

In [60]:
torch2d.shape

torch.Size([4, 2])

In [61]:
torch1d.shape

torch.Size([4])

In [67]:
# # This will error out
# # RuntimeError: size mismatch, got input (4), mat (4x2), vec (4)

# torch2d.matmul(torch1d)

In [68]:
torch1d.matmul(torch2d)

tensor([50, 60])

In [69]:
torch1d.matmul(torch2d).shape

torch.Size([2])

In [70]:
torch1d @ torch2d

tensor([50, 60])

In [73]:
torch2d

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

In [74]:
torch2d.matmul(torch2d.T)

tensor([[  5,  11,  17,  23],
        [ 11,  25,  39,  53],
        [ 17,  39,  61,  83],
        [ 23,  53,  83, 113]])

In [76]:
torch2d @ torch2d.T

tensor([[  5,  11,  17,  23],
        [ 11,  25,  39,  53],
        [ 17,  39,  61,  83],
        [ 23,  53,  83, 113]])

## Seeing models as computation graphs

### A logistic regression forward pass

In [79]:
import torch.nn.functional as F 

y = torch.tensor([1.])
x1 = torch.tensor([1.1])

w1 = torch.tensor([2.2])
b1 = torch.tensor([0.0])

z = x1 * w1 + b1
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y) 

loss

tensor(0.0852)

In [85]:
import torch.nn.functional as F 

y = torch.tensor([1.])
x1 = torch.tensor([1.1])

w1 = torch.tensor([2.2])
b1 = torch.tensor([0.0])

z = x1 * w1 + b1
a = torch.sigmoid(z)

loss = F.binary_cross_entropy_with_logits(z, y) 
loss

tensor(0.0852)

In [86]:
z.requires_grad

False

In [87]:
z

tensor([2.4200])

### Computing gradients via autograd

In [97]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.])
x1 = torch.tensor([1.1])

w1 = torch.tensor([2.2], requires_grad=True)
b1 = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b1
a = torch.sigmoid(z)

loss = F.binary_cross_entropy_with_logits(z, y) 

# manual way 
grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b1 = grad(loss, b1, retain_graph=True)


In [98]:
print(f"grad_L_w1 : {grad_L_w1}")
print(f"grad_L_b1 : {grad_L_b1}")


grad_L_w1 : (tensor([-0.0898]),)
grad_L_b1 : (tensor([-0.0817]),)


In [107]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.])
x1 = torch.tensor([1.1])

w1 = torch.tensor([2.2], requires_grad=True)
b1 = torch.tensor([0.0], requires_grad=True)

z = x1 * w1 + b1
a = torch.sigmoid(z)

loss = F.binary_cross_entropy_with_logits(z, y) 

# pytorch way 
loss.backward()
print(f"grad_L_w1 : {w1.grad}")
print(f"grad_L_b1 : {b1.grad}")

grad_L_w1 : tensor([-0.0898])
grad_L_b1 : tensor([-0.0817])


### Implementing multilayer neural networks


![img](./img1.png)

In [114]:
import torch 

class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
    
        self.layers = torch.nn.Sequential(
            #1st Layer 
            torch.nn.Linear(in_features=num_inputs, out_features=30),
            torch.nn.ReLU(),

            #2nd Layer 
            torch.nn.Linear(in_features=30, out_features=20),
            torch.nn.ReLU(),

            #Output Layer 
            torch.nn.Linear(in_features=20, out_features=num_outputs)
        
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits 

In [115]:
model = NeuralNetwork(50, 3)

In [116]:
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [119]:
# Total no. of parameters 
model.parameters()

<generator object Module.parameters at 0x302eb0040>

In [125]:
for p in model.parameters():
    print(p, len(p))
    

Parameter containing:
tensor([[ 0.1372,  0.0784, -0.0019,  ...,  0.0681, -0.0281,  0.0667],
        [-0.0199, -0.0477, -0.0605,  ..., -0.0633, -0.1335, -0.0358],
        [-0.0840, -0.0862, -0.0478,  ..., -0.0727, -0.0694, -0.0108],
        ...,
        [ 0.1237, -0.0767,  0.0859,  ...,  0.1031,  0.0091,  0.0843],
        [-0.0719,  0.0441,  0.1113,  ..., -0.0203,  0.0188, -0.0364],
        [ 0.0402,  0.0081,  0.1389,  ...,  0.0798, -0.0290,  0.0611]],
       requires_grad=True) 30
Parameter containing:
tensor([ 0.0287,  0.1103,  0.1040, -0.1187, -0.0915,  0.0386, -0.0120,  0.1410,
         0.1214, -0.0157, -0.0154, -0.0846, -0.1409, -0.0690,  0.0485, -0.1367,
        -0.0757,  0.1102,  0.0872,  0.1344,  0.0535,  0.0403, -0.0702,  0.0319,
         0.0166, -0.0580,  0.0715,  0.0268, -0.1381, -0.0508],
       requires_grad=True) 30
Parameter containing:
tensor([[-1.0424e-01, -1.5308e-01, -1.4587e-01,  8.8064e-02,  4.1055e-02,
          1.7843e-01,  5.4635e-03, -1.5055e-01,  3.1943e-02,  1

In [133]:
num_params = 0
for p in model.parameters():
    if p.requires_grad:
        print(len(p), p.numel())
        num_params += p.numel()

print(f"Total no. of trainable prams : {num_params}")

30 1500
30 30
20 600
20 20
3 60
3 3
Total no. of trainable prams : 2213


In [134]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)

Total number of trainable model parameters: 2213


In [137]:
model

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)

In [138]:
model.layers

Sequential(
  (0): Linear(in_features=50, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=3, bias=True)
)

In [139]:
model.layers[0]

Linear(in_features=50, out_features=30, bias=True)

In [140]:
model.layers[0].weight

Parameter containing:
tensor([[ 0.1372,  0.0784, -0.0019,  ...,  0.0681, -0.0281,  0.0667],
        [-0.0199, -0.0477, -0.0605,  ..., -0.0633, -0.1335, -0.0358],
        [-0.0840, -0.0862, -0.0478,  ..., -0.0727, -0.0694, -0.0108],
        ...,
        [ 0.1237, -0.0767,  0.0859,  ...,  0.1031,  0.0091,  0.0843],
        [-0.0719,  0.0441,  0.1113,  ..., -0.0203,  0.0188, -0.0364],
        [ 0.0402,  0.0081,  0.1389,  ...,  0.0798, -0.0290,  0.0611]],
       requires_grad=True)

In [143]:
model.layers[0]

Linear(in_features=50, out_features=30, bias=True)

In [144]:
model.layers[1]

ReLU()

In [146]:
model.layers[2]

Linear(in_features=30, out_features=20, bias=True)

In [147]:
model.layers[0].weight.shape

torch.Size([30, 50])

In [148]:
model.layers[0].bias.shape

torch.Size([30])

In [150]:
model.layers[0].weight

Parameter containing:
tensor([[ 0.1372,  0.0784, -0.0019,  ...,  0.0681, -0.0281,  0.0667],
        [-0.0199, -0.0477, -0.0605,  ..., -0.0633, -0.1335, -0.0358],
        [-0.0840, -0.0862, -0.0478,  ..., -0.0727, -0.0694, -0.0108],
        ...,
        [ 0.1237, -0.0767,  0.0859,  ...,  0.1031,  0.0091,  0.0843],
        [-0.0719,  0.0441,  0.1113,  ..., -0.0203,  0.0188, -0.0364],
        [ 0.0402,  0.0081,  0.1389,  ...,  0.0798, -0.0290,  0.0611]],
       requires_grad=True)

In [153]:
model.layers[0].weight[0]

tensor([ 0.1372,  0.0784, -0.0019,  0.0563,  0.0942, -0.1250, -0.0641,  0.0099,
         0.0156,  0.0787, -0.0928, -0.0025,  0.1321, -0.1354, -0.0797,  0.0535,
        -0.0301,  0.0944, -0.0660, -0.0618, -0.0259, -0.0612, -0.1116,  0.0289,
         0.1346,  0.0692,  0.0274, -0.1067, -0.1413, -0.0833,  0.0936, -0.0742,
        -0.0558,  0.1014,  0.0077, -0.1299,  0.0186, -0.0342, -0.1197,  0.1184,
         0.1010,  0.1387, -0.1384, -0.1107,  0.0126, -0.0520,  0.0910,  0.0681,
        -0.0281,  0.0667], grad_fn=<SelectBackward0>)

In [161]:
# Forward pass 
torch.manual_seed(123)

X = torch.rand(1, 50)
out = model(X)
print(out)

tensor([[ 0.0854, -0.1516, -0.1176]], grad_fn=<AddmmBackward0>)


In [162]:
X.shape, X.dim()

(torch.Size([1, 50]), 2)

In [165]:
# For inference 

with torch.no_grad():
    out = model(X)
print(out) 

tensor([[ 0.0854, -0.1516, -0.1176]])


In [166]:
# For inference 

with torch.no_grad():
    out = torch.softmax(model(X), dim=1)
print(out) 

tensor([[0.3838, 0.3028, 0.3133]])


### Setting up efficient data loaders

PyTorch implements a Dataset and a DataLoader class. The Dataset class is used to instantiate objects that define how each data record is loaded. The DataLoader handles how the data is shuffled and assembled into batches.


![](./img2.png)

In [170]:
# Creating a toy dataset (two features and class lable) 

#Train 
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

y_train = torch.tensor([0, 0, 0, 1, 1])

# Test
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6],
])

y_test = torch.tensor([0, 1])


In [197]:
from torch.utils.data import Dataset

class ToyDataset(Dataset):

    def __init__(self, X, y):
        self.features = X
        self.labels = y 

    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]

        return one_x, one_y

    def __len__(self):
        return self.labels.shape[0]


train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [198]:
train_ds.features, test_ds.features

(tensor([[-1.2000,  3.1000],
         [-0.9000,  2.9000],
         [-0.5000,  2.6000],
         [ 2.3000, -1.1000],
         [ 2.7000, -1.5000]]),
 tensor([[-0.8000,  2.8000],
         [ 2.6000, -1.6000]]))

In [199]:
train_ds.labels, test_ds.labels

(tensor([0, 0, 0, 1, 1]), tensor([0, 1]))

In [200]:
train_ds[0]

(tensor([-1.2000,  3.1000]), tensor(0))

In [201]:
len(train_ds)

5

In [244]:
from torch.utils.data import DataLoader
torch.manual_seed(123)

train_loader = DataLoader(dataset=train_ds, 
                         batch_size = 2,
                         shuffle=True,
                         num_workers=0)

test_loader = DataLoader(dataset=test_ds, 
                         batch_size = 2,
                         shuffle=False,
                         num_workers=0)

In [245]:
for idx, (x, y) in enumerate(train_loader):
    print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])


In [246]:
train_ds.features, test_ds.features

(tensor([[-1.2000,  3.1000],
         [-0.9000,  2.9000],
         [-0.5000,  2.6000],
         [ 2.3000, -1.1000],
         [ 2.7000, -1.5000]]),
 tensor([[-0.8000,  2.8000],
         [ 2.6000, -1.6000]]))

In [256]:
train_loader = DataLoader(dataset=train_ds, 
                         batch_size = 2,
                         shuffle=True,
                         num_workers=0,
                         drop_last=True)

In [257]:
for idx, (x, y) in enumerate(train_loader):
    print(f"Batch {idx+1}:", x, y)

Batch 1: tensor([[ 2.3000, -1.1000],
        [-1.2000,  3.1000]]) tensor([1, 0])
Batch 2: tensor([[-0.5000,  2.6000],
        [ 2.7000, -1.5000]]) tensor([0, 1])


### A typical training loop

In [258]:
train_loader = DataLoader(dataset=train_ds, 
                         batch_size = 2,
                         shuffle=True,
                         num_workers=0,
                         drop_last=True)

In [266]:
import torch
import torch.nn.functional as F

torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=0.5)

num_epochs = 3

for epoch in range(num_epochs):
    for batch, (x, y) in enumerate(train_loader):

        logits = model(x)
        
        loss = F.cross_entropy(logits, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Logging 
        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
              f" | Batch {batch:03d}/{len(train_loader):03d}"
              f" | Train Loss: {loss:.2f}")
    model.eval()
        

Epoch: 001/003 | Batch 000/002 | Train Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train Loss: 0.65
Epoch: 002/003 | Batch 000/002 | Train Loss: 0.44
Epoch: 002/003 | Batch 001/002 | Train Loss: 0.13
Epoch: 003/003 | Batch 000/002 | Train Loss: 0.03
Epoch: 003/003 | Batch 001/002 | Train Loss: 0.00


In [267]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)

Total number of trainable model parameters: 752


In [290]:
model.eval()
with torch.no_grad():
    outputs = model(X_train)

print(outputs)

tensor([[2.8569e+00, -4.1618e+00],
        [2.5382e+00, -3.7548e+00],
        [2.0944e+00, -3.1820e+00],
        [-1.4814e+00, 1.4816e+00],
        [-1.7176e+00, 1.7342e+00]])


In [291]:
torch.set_printoptions(sci_mode=True)
probas = torch.softmax(outputs, dim=1)
print(probas)

tensor([[9.9911e-01, 8.9419e-04],
        [9.9815e-01, 1.8458e-03],
        [9.9491e-01, 5.0852e-03],
        [4.9127e-02, 9.5087e-01],
        [3.0714e-02, 9.6929e-01]])


In [292]:
torch.round(probas, decimals=1)

tensor([[1.0000e+00, 0.0000e+00],
        [1.0000e+00, 0.0000e+00],
        [1.0000e+00, 0.0000e+00],
        [0.0000e+00, 1.0000e+00],
        [0.0000e+00, 1.0000e+00]])

In [294]:
predictions = torch.argmax(outputs, dim=1)
print(predictions)

tensor([0, 0, 0, 1, 1])


In [295]:
predictions = torch.argmax(probas, dim=1)
print(predictions)

tensor([0, 0, 0, 1, 1])


In [296]:
torch.sum(predictions)

tensor(2)

In [298]:
torch.sum(predictions == y_train)

tensor(5)

In [299]:
def compute_accuracy(model, dataloader):
 
    model = model.eval()
    correct = 0.0
    total_examples = 0
    
    for idx, (features, labels) in enumerate(dataloader):
        
        with torch.no_grad():
            logits = model(features)
        
        predictions = torch.argmax(logits, dim=1)
        compare = labels == predictions
        correct += torch.sum(compare)
        total_examples += len(compare)
 
    return (correct / total_examples).item()

In [301]:
print(compute_accuracy(model, test_loader))

1.0


### Saving and loading models

In [302]:
torch.save(model.state_dict(), 'model.pth')

In [307]:
model = NeuralNetwork(2, 2)
model.load_state_dict(torch.load('model.pth'))

<All keys matched successfully>

In [308]:
model(X_test)

tensor([[2.4088e+00, -3.5848e+00],
        [-1.7179e+00, 1.7334e+00]], grad_fn=<AddmmBackward0>)

In [309]:
print(compute_accuracy(model, test_loader))

1.0
