Binary Classification: forward pass

In [8]:
import torch
import torch.nn as nn

In [9]:
# Create input data of shape 5*6
input_data = torch.tensor(
    [[-0.4421, 1.5207, 2.0607, -0.3647, 0.4691, 0.0946],
     [0.4838, 0.4001, 0.9786, 0.7858, 0.9876, 0.5987],
     [0.1902, 0.8507, 0.8175, 0.0994, 0.3862, 0.0132],
     [0.1973, 0.6625, 0.8123, 0.4483, 0.0345, 0.4165],
     [0.6948, 0.3557, 0.7689, 0.2793, 0.6816, 0.9152]]
)

In [10]:
# Create binary classification model
model = nn.Sequential(
    nn.Linear(6,4), # First linear layer
    nn.Linear(4,1), # Second linear layer
    nn.Sigmoid() # Sigmoid activation function
)

# Pass input data through model
output = model(input_data)

In [11]:
print(output) 
# Ouputs are five probabilities between 0 and 1
# It gives one value for each sample(row) in data

# Classification
# Class = 0 for first and third values
# Class = 1 for second, fourth and fifth values

tensor([[0.6753],
        [0.6362],
        [0.6204],
        [0.6026],
        [0.6195]], grad_fn=<SigmoidBackward0>)


In [12]:
# Specify model has three classes
n_classes = 3

# Create multi-class classification model
model = nn.Sequential(
    nn.Linear(6,4), # First linear layer
    nn.Linear(4,n_classes), # Second linear layer
    nn.Softmax(dim = -1) # Softmax activation function
)

# Pass input data through model
output = model(input_data)
print(output.shape) # Output shape is 5*3

torch.Size([5, 3])


In [13]:
print(output)

# Each row sums to one
# Value with highest probability is assigned predicted label in each row

tensor([[0.2601, 0.4146, 0.3253],
        [0.1531, 0.4248, 0.4221],
        [0.2217, 0.4337, 0.3446],
        [0.2073, 0.4400, 0.3527],
        [0.1676, 0.4247, 0.4077]], grad_fn=<SoftmaxBackward0>)


In [14]:
# Regression : Forwards pass

# Create regression model
model = nn.Sequential(
    nn.Linear(6,4), # First linear layer
    nn.Linear(4,1) # Second linear layer
)

# Pass input data through model
output = model(input_data)

print(output)

tensor([[-0.6425],
        [-0.3394],
        [-0.2734],
        [-0.2111],
        [-0.1788]], grad_fn=<AddmmBackward0>)


#### Building a binary classifier in PyTorch

Recall that a small neural network with a single linear layer followed by a sigmoid function is a binary classifier. It acts just like a logistic regression.

In this exercise, you'll practice building this small network and interpreting the output of the classifier.

In [15]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Implement a small neural network for binary classification
model = nn.Sequential(
  nn.Linear(8,1),
  nn.Sigmoid()
)

output = model(input_tensor)
print(output)

tensor([[0.4376]], grad_fn=<SigmoidBackward0>)


In [16]:
# Regression model

import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(
    nn.Linear(11, 8),
    nn.Linear(8, 4),
    nn.Linear(4, 2),
    nn.Linear(2, 1)
    )

output = model(input_tensor)
print(output)

tensor([[-0.8533]], grad_fn=<AddmmBackward0>)


In [17]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4), 
  nn.Softmax(dim = -1)
)

output = model(input_tensor)
print(output)

tensor([[0.1926, 0.1312, 0.4520, 0.2242]], grad_fn=<SoftmaxBackward0>)


In [18]:
# OHE Manually
import numpy as np
one_hot_numpy = np.array([1,0,0])

In [19]:
# Prevent doing manually

import torch.nn.functional as f
f.one_hot(torch.tensor(0), num_classes = 3)

tensor([1, 0, 0])

In [20]:
f.one_hot(torch.tensor(1), num_classes = 3)

tensor([0, 1, 0])

In [21]:
f.one_hot(torch.tensor(2), num_classes = 3)

tensor([0, 0, 1])

In [22]:
# Cross Entropy Loss in PyTorch
import torch
from torch.nn import CrossEntropyLoss

scores = torch.tensor([[-0.1211, 0.1059]])
one_hot_target = torch.tensor([[1,0]])

criterion = CrossEntropyLoss()
criterion(scores.double(), one_hot_target.double())

tensor(0.8131, dtype=torch.float64)

Creating one-hot encoded labels

One-hot encoding is a technique that turns a single integer label into a vector of N elements, where N is the number of classes in your dataset. This vector only contains zeros and ones.

In [23]:
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([1 if i == y else 0 for i in range(num_classes)])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = torch.nn.functional.one_hot(torch.tensor(y), num_classes)

In [24]:
print(one_hot_numpy)

[0 1 0]


In [25]:
print(one_hot_pytorch)

tensor([0, 1, 0])


Calculating cross entropy loss

Cross entropy loss is the most used loss for classification problems.

In [26]:
import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), scores.shape[1])

# Create the cross entropy loss function
criterion = CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(scores.double(), one_hot_label.double())
print(loss)

tensor(8.0619, dtype=torch.float64)


Backpropagation in PyTorch

In [27]:
# Create the model and run a forward pass
model = nn.Sequential(
    nn.Linear(16,8),
    nn.Linear(8,4),
    nn.Linear(4,2)
)

# Sample input tensor
sample = torch.randn(1, 16)

# Sample target tensor (class index)
target = torch.tensor([1])

prediction = model(sample)

# Calculate the loss and compute the gradients
criterion = CrossEntropyLoss()
loss = criterion(prediction, target)
loss.backward()

# Access each layer's gradients
model[0].weight.grad, model[0].bias.grad

(tensor([[ 9.8917e-02,  1.2458e-01,  4.3436e-02,  9.9884e-03, -4.9308e-02,
          -1.1496e-01,  9.6628e-02,  9.4046e-05, -4.2432e-02,  2.4937e-02,
           1.5690e-02,  2.9455e-02,  2.2989e-02, -9.1310e-02, -5.2834e-02,
          -3.2926e-02],
         [ 9.2879e-02,  1.1698e-01,  4.0785e-02,  9.3786e-03, -4.6298e-02,
          -1.0794e-01,  9.0729e-02,  8.8305e-05, -3.9841e-02,  2.3415e-02,
           1.4732e-02,  2.7657e-02,  2.1586e-02, -8.5736e-02, -4.9609e-02,
          -3.0916e-02],
         [ 2.7301e-01,  3.4385e-01,  1.1988e-01,  2.7568e-02, -1.3609e-01,
          -3.1729e-01,  2.6669e-01,  2.5957e-04, -1.1711e-01,  6.8827e-02,
           4.3305e-02,  8.1296e-02,  6.3451e-02, -2.5202e-01, -1.4582e-01,
          -9.0877e-02],
         [ 3.2392e-03,  4.0797e-03,  1.4224e-03,  3.2708e-04, -1.6147e-03,
          -3.7645e-03,  3.1642e-03,  3.0797e-06, -1.3895e-03,  8.1660e-04,
           5.1379e-04,  9.6455e-04,  7.5282e-04, -2.9901e-03, -1.7301e-03,
          -1.0782e-03],
    

In [28]:
model[1].weight.grad, model[1].bias.grad

(tensor([[ 0.0126, -0.0069, -0.0019,  0.0038,  0.0010, -0.0057,  0.0021,  0.0072],
         [-0.5167,  0.2835,  0.0780, -0.1550, -0.0396,  0.2355, -0.0873, -0.2977],
         [-0.4386,  0.2407,  0.0662, -0.1316, -0.0336,  0.1999, -0.0741, -0.2528],
         [ 0.0567, -0.0311, -0.0086,  0.0170,  0.0043, -0.0258,  0.0096,  0.0327]]),
 tensor([-0.0113,  0.4646,  0.3945, -0.0510]))

In [29]:
model[2].weight.grad, model[2].bias.grad

(tensor([[-0.0291,  0.2597,  0.2738,  0.1583],
         [ 0.0291, -0.2597, -0.2738, -0.1583]]),
 tensor([ 0.6752, -0.6752]))

In [30]:
# Updating model parameters manually

# learning rate is typically small
lr = 0.001

# Update the weights
weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad

# Update the biases
bias = model[0].bias
bias_grad = model[0].bias.grad
bias = bias - lr * bias_grad

In [31]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Sigmoid(),
                      nn.Linear(8, 2))

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[2].bias