# Introduction to Pytorch

## The Basics

In [1]:
# !pip install requirements.txt

### Tensor

In [None]:
from torch import tensor

In [None]:
input_tensor = tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
input_tensor

In [None]:
input_tensor.shape

In [None]:
input_tensor.dtype

### Linear Layer

In [6]:
from torch import nn

In [None]:
linear_layer = nn.Linear(in_features=3, out_features=2)
linear_layer

In [None]:
linear_layer.weight

In [None]:
linear_layer.bias

In [None]:
output = linear_layer(input_tensor)
output

### Stacking Layers

In [11]:
from torch import nn

In [None]:
n_features = 3
n_output = 2

model = nn.Sequential(
    nn.Linear(in_features=n_features, out_features=8),
    nn.Linear(in_features=8, out_features=4),
    nn.Linear(in_features=4, out_features=n_output),
)
model

### Model Parameters

In [13]:
from torch import nn

model = nn.Sequential(nn.Linear(3, 8), nn.Linear(8, 4), nn.Linear(4, 2))

In [None]:
total_params = 0
for parameter in model.parameters():
    # print(parameter)
    print(parameter.numel())
    print("=" * 50)
    total_params += parameter.numel()

print(f"Total parameters: {total_params}")

## Neural Networks Architecture

### Activation Layer: Sigmoid Function

In [None]:
from torch import tensor, nn

input_tensor = tensor([[6.0]])
input_tensor

In [None]:
sigmoid_layer = nn.Sigmoid()
sigmoid_layer(input_tensor)

### Activation Layer: Softmax Function

In [None]:
from torch import tensor, nn

input_tensor = tensor([[4.3, 6.1, 2.3]])
input_tensor

In [None]:
softmax_layer = nn.Softmax(dim=-1)
softmax_layer(input_tensor)

### Forward Pass: Binary Classification

In [None]:
from torch import tensor, nn, randn

input_tensor = randn(
    5, 6
)  # Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor

In [None]:
model = nn.Sequential(
    nn.Linear(in_features=6, out_features=4),  # First Linear Layer
    nn.Linear(in_features=4, out_features=1),  # Second Linear Layer
    nn.Sigmoid(),  # Sigmoid Activation Function
)

model(input_tensor)

### Forward Pass: Multi-class Classification

In [None]:
from torch import tensor, nn, randn

# Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor = randn(5, 6)
input_tensor

In [None]:
n_classes = 3

model = nn.Sequential(
    nn.Linear(in_features=6, out_features=4),  # First Linear Layer
    nn.Linear(in_features=4, out_features=n_classes),  # Second Linear Layer
    nn.Softmax(dim=-1),  # Softmax Activation Function
)

model(input_tensor)

### Forward Pass: Regression

In [None]:
from torch import tensor, nn, randn

# Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor = randn(5, 6)
input_tensor

In [None]:
model = nn.Sequential(
    nn.Linear(in_features=6, out_features=4),  # First Linear Layer
    nn.Linear(in_features=4, out_features=1),  # Second Linear Layer
)

model(input_tensor)

## Loss Functions

### Transforming Labels with One-hot Encoding

In [3]:
from torch import tensor
import torch.nn.functional as F

F.one_hot(tensor([0, 1, 2]), num_classes=3)

tensor([[1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]])

### Cross Entropy Loss

In [4]:
from torch import tensor, nn

y_hat = tensor([[-5.2, 4.6, 0.8]])
y_hat

tensor([[-5.2000,  4.6000,  0.8000]])

In [5]:
y = tensor([0])
one_hot_y = F.one_hot(y, num_classes=3)
one_hot_y

tensor([[1, 0, 0]])

In [6]:
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(y_hat.double(), one_hot_y.double())
loss

tensor(9.8222, dtype=torch.float64)

In [9]:
loss.backward

<bound method Tensor.backward of tensor(9.8222, dtype=torch.float64)>

### Backpropagation: Updating Model Parameters

In [1]:
from torch import tensor, nn, randn

# Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor = randn(5, 6)
input_tensor

  device: torch.device = torch.device(torch._C._get_default_device()),  # torch.device('cpu'),


tensor([[-0.1751, -0.0091,  1.5720,  1.0588,  0.4389,  0.4502],
        [-0.1980,  0.7915,  0.7152,  0.5364,  0.1342, -0.5360],
        [ 1.4671, -1.7298, -0.8519,  1.6462,  0.2115,  0.7831],
        [ 0.1020, -1.0473,  0.4160,  1.0090,  0.5098, -0.7181],
        [ 1.2333,  0.4922,  0.0980,  0.6282, -1.0808, -1.4880]])

In [2]:
target_tensor = tensor([0.32, 1.94, 2.35, 0.53, 1.73])  # Random class labels for 5 samples

In [8]:
model = nn.Sequential(
    nn.Linear(in_features=6, out_features=8),  # First Linear Layer
    nn.Linear(in_features=8, out_features=4),  # Second Linear Layer
    nn.Linear(in_features=4, out_features=2),  # Third Linear Layer
)

prediction = model(input_tensor)
prediction

tensor([[-0.1755, -0.2119],
        [-0.3019, -0.2850],
        [-0.2725, -0.2947],
        [-0.2508, -0.2767],
        [-0.1953, -0.2897]], grad_fn=<AddmmBackward0>)

In [9]:
loss_fn = nn.CrossEntropyLoss()
loss_fn(prediction.double(), target_tensor.double())

RuntimeError: expected scalar type Long but found Double