Binary Classification: forward pass

In [1]:
import torch
import torch.nn as nn

In [2]:
# Create input data of shape 5*6
input_data = torch.tensor(
    [[-0.4421, 1.5207, 2.0607, -0.3647, 0.4691, 0.0946],
     [0.4838, 0.4001, 0.9786, 0.7858, 0.9876, 0.5987],
     [0.1902, 0.8507, 0.8175, 0.0994, 0.3862, 0.0132],
     [0.1973, 0.6625, 0.8123, 0.4483, 0.0345, 0.4165],
     [0.6948, 0.3557, 0.7689, 0.2793, 0.6816, 0.9152]]
)

In [3]:
# Create binary classification model
model = nn.Sequential(
    nn.Linear(6,4), # First linear layer
    nn.Linear(4,1), # Second linear layer
    nn.Sigmoid() # Sigmoid activation function
)

# Pass input data through model
output = model(input_data)

In [4]:
print(output) 
# Ouputs are five probabilities between 0 and 1
# It gives one value for each sample(row) in data

# Classification
# Class = 0 for first and third values
# Class = 1 for second, fourth and fifth values

tensor([[0.4647],
        [0.4493],
        [0.4568],
        [0.4505],
        [0.4582]], grad_fn=<SigmoidBackward0>)


In [5]:
# Specify model has three classes
n_classes = 3

# Create multi-class classification model
model = nn.Sequential(
    nn.Linear(6,4), # First linear layer
    nn.Linear(4,n_classes), # Second linear layer
    nn.Softmax(dim = -1) # Softmax activation function
)

# Pass input data through model
output = model(input_data)
print(output.shape) # Output shape is 5*3

torch.Size([5, 3])


In [6]:
print(output)

# Each row sums to one
# Value with highest probability is assigned predicted label in each row

tensor([[0.2884, 0.4384, 0.2732],
        [0.3075, 0.4194, 0.2731],
        [0.3294, 0.4034, 0.2672],
        [0.3096, 0.4249, 0.2655],
        [0.2933, 0.4384, 0.2683]], grad_fn=<SoftmaxBackward0>)


In [7]:
# Regression : Forwards pass

# Create regression model
model = nn.Sequential(
    nn.Linear(6,4), # First linear layer
    nn.Linear(4,1) # Second linear layer
)

# Pass input data through model
output = model(input_data)

print(output)

tensor([[0.1507],
        [0.7421],
        [0.3649],
        [0.3432],
        [0.8036]], grad_fn=<AddmmBackward0>)


#### Building a binary classifier in PyTorch

Recall that a small neural network with a single linear layer followed by a sigmoid function is a binary classifier. It acts just like a logistic regression.

In this exercise, you'll practice building this small network and interpreting the output of the classifier.

In [8]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])

# Implement a small neural network for binary classification
model = nn.Sequential(
  nn.Linear(8,1),
  nn.Sigmoid()
)

output = model(input_tensor)
print(output)

tensor([[0.0518]], grad_fn=<SigmoidBackward0>)


In [9]:
# Regression model

import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(
    nn.Linear(11, 8),
    nn.Linear(8, 4),
    nn.Linear(4, 2),
    nn.Linear(2, 1)
    )

output = model(input_tensor)
print(output)

tensor([[-0.2995]], grad_fn=<AddmmBackward0>)


In [10]:
import torch
import torch.nn as nn

input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Update network below to perform a multi-class classification with four labels
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4), 
  nn.Softmax(dim = -1)
)

output = model(input_tensor)
print(output)

tensor([[0.0819, 0.0649, 0.4576, 0.3957]], grad_fn=<SoftmaxBackward0>)


In [11]:
# OHE Manually
import numpy as np
one_hot_numpy = np.array([1,0,0])

In [12]:
# Prevent doing manually

import torch.nn.functional as f
f.one_hot(torch.tensor(0), num_classes = 3)

tensor([1, 0, 0])

In [13]:
f.one_hot(torch.tensor(1), num_classes = 3)

tensor([0, 1, 0])

In [14]:
f.one_hot(torch.tensor(2), num_classes = 3)

tensor([0, 0, 1])

In [15]:
# Cross Entropy Loss in PyTorch
import torch
from torch.nn import CrossEntropyLoss

scores = torch.tensor([[-0.1211, 0.1059]])
one_hot_target = torch.tensor([[1,0]])

criterion = CrossEntropyLoss()
criterion(scores.double(), one_hot_target.double())

tensor(0.8131, dtype=torch.float64)

Creating one-hot encoded labels

One-hot encoding is a technique that turns a single integer label into a vector of N elements, where N is the number of classes in your dataset. This vector only contains zeros and ones.

In [17]:
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([1 if i == y else 0 for i in range(num_classes)])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = torch.nn.functional.one_hot(torch.tensor(y), num_classes)

In [18]:
print(one_hot_numpy)

[0 1 0]


In [19]:
print(one_hot_pytorch)

tensor([0, 1, 0])


Calculating cross entropy loss

Cross entropy loss is the most used loss for classification problems.

In [26]:
import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), scores.shape[1])

# Create the cross entropy loss function
criterion = CrossEntropyLoss()

# Calculate the cross entropy loss
loss = criterion(scores.double(), one_hot_label.double())
print(loss)

tensor(8.0619, dtype=torch.float64)


Backpropagation in PyTorch

In [33]:
# Create the model and run a forward pass
model = nn.Sequential(
    nn.Linear(16,8),
    nn.Linear(8,4),
    nn.Linear(4,2)
)

# Sample input tensor
sample = torch.randn(1, 16)

# Sample target tensor (class index)
target = torch.tensor([1])

prediction = model(sample)

# Calculate the loss and compute the gradients
criterion = CrossEntropyLoss()
loss = criterion(prediction, target)
loss.backward()

# Access each layer's gradients
model[0].weight.grad, model[0].bias.grad

(tensor([[-0.0670,  0.2443,  0.1667,  0.0924,  0.1875, -0.0193,  0.0370, -0.1299,
          -0.0245, -0.0327,  0.1486, -0.1074, -0.0208, -0.0623,  0.0813, -0.1135],
         [ 0.0936, -0.3415, -0.2330, -0.1292, -0.2622,  0.0269, -0.0518,  0.1816,
           0.0342,  0.0457, -0.2077,  0.1501,  0.0291,  0.0871, -0.1136,  0.1587],
         [ 0.0276, -0.1005, -0.0686, -0.0380, -0.0771,  0.0079, -0.0152,  0.0535,
           0.0101,  0.0134, -0.0611,  0.0442,  0.0086,  0.0256, -0.0334,  0.0467],
         [-0.0018,  0.0065,  0.0044,  0.0024,  0.0050, -0.0005,  0.0010, -0.0034,
          -0.0006, -0.0009,  0.0039, -0.0028, -0.0006, -0.0016,  0.0021, -0.0030],
         [ 0.0908, -0.3312, -0.2260, -0.1253, -0.2543,  0.0261, -0.0502,  0.1762,
           0.0332,  0.0443, -0.2014,  0.1456,  0.0282,  0.0844, -0.1102,  0.1539],
         [-0.0066,  0.0242,  0.0165,  0.0091,  0.0185, -0.0019,  0.0037, -0.0129,
          -0.0024, -0.0032,  0.0147, -0.0106, -0.0021, -0.0062,  0.0080, -0.0112],
         [

In [35]:
model[1].weight.grad, model[1].bias.grad

(tensor([[ 0.3443, -0.5129, -0.1063,  0.0071,  0.1235,  0.1755, -0.0025, -0.0056],
         [-0.2651,  0.3949,  0.0819, -0.0055, -0.0951, -0.1351,  0.0020,  0.0043],
         [-0.3465,  0.5162,  0.1070, -0.0072, -0.1243, -0.1766,  0.0026,  0.0056],
         [ 0.3472, -0.5172, -0.1072,  0.0072,  0.1246,  0.1770, -0.0026, -0.0056]]),
 tensor([ 0.3906, -0.3007, -0.3931,  0.3939]))

In [36]:
model[2].weight.grad, model[2].bias.grad

(tensor([[-0.2291, -0.1491,  0.0316, -0.2477],
         [ 0.2291,  0.1491, -0.0316,  0.2477]]),
 tensor([ 0.4556, -0.4556]))

In [37]:
# Updating model parameters manually

# learning rate is typically small
lr = 0.001

# Update the weights
weight = model[0].weight
weight_grad = model[0].weight.grad
weight = weight - lr * weight_grad

# Update the biases
bias = model[0].bias
bias_grad = model[0].bias.grad
bias = bias - lr * bias_grad

In [41]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Sigmoid(),
                      nn.Linear(8, 2))

# Access the weight of the first linear layer
weight_0 = model[0].weight

# Access the bias of the second linear layer
bias_1 = model[2].bias