# Introduction to PyTorch

## The Basics

In [1]:
# !pip install requirements.txt

### Tensor

In [2]:
from torch import tensor

In [3]:
input_tensor = tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
input_tensor

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [4]:
input_tensor.shape

torch.Size([2, 3])

In [5]:
input_tensor.dtype

torch.float32

### Linear Layer

In [6]:
from torch import nn

In [7]:
linear_layer = nn.Linear(in_features=3, out_features=2)
linear_layer

Linear(in_features=3, out_features=2, bias=True)

In [8]:
linear_layer.weight

Parameter containing:
tensor([[-0.0686,  0.1388, -0.1875],
        [ 0.0511,  0.2553,  0.3471]], requires_grad=True)

In [9]:
linear_layer.bias

Parameter containing:
tensor([ 0.5137, -0.3740], requires_grad=True)

In [10]:
output = linear_layer(input_tensor)
output

tensor([[ 0.1601,  1.2292],
        [-0.1918,  3.1899]], grad_fn=<AddmmBackward0>)

### Stacking Layers

In [11]:
from torch import nn

In [12]:
n_features = 3
n_output = 2

model = nn.Sequential(
    nn.Linear(in_features=n_features, out_features=8),
    nn.Linear(in_features=8, out_features=4),
    nn.Linear(in_features=4, out_features=n_output),
)
model

Sequential(
  (0): Linear(in_features=3, out_features=8, bias=True)
  (1): Linear(in_features=8, out_features=4, bias=True)
  (2): Linear(in_features=4, out_features=2, bias=True)
)

### Model Parameters

In [13]:
from torch import nn

model = nn.Sequential(nn.Linear(3, 8), nn.Linear(8, 4), nn.Linear(4, 2))

In [14]:
total_params = 0
for parameter in model.parameters():
    # print(parameter)
    print(parameter.numel())
    print("=" * 50)
    total_params += parameter.numel()

print(f"Total parameters: {total_params}")

24
8
32
4
8
2
Total parameters: 78


## Neural Networks Architecture

### Activation Layer: Sigmoid Function

In [15]:
from torch import tensor, nn

input_tensor = tensor([[6.0, -6.0]])
input_tensor

tensor([[ 6., -6.]])

In [16]:
sigmoid_layer = nn.Sigmoid()
sigmoid_layer(input_tensor)

tensor([[0.9975, 0.0025]])

### Activation Layer: Softmax Function

In [17]:
from torch import tensor, nn

input_tensor = tensor([[4.3, 6.1, 2.3]])
input_tensor

tensor([[4.3000, 6.1000, 2.3000]])

In [18]:
softmax_layer = nn.Softmax(dim=-1)
softmax_layer(input_tensor)

tensor([[0.1392, 0.8420, 0.0188]])

### Activation Layer: ReLU (Rectified Linear Unit)

In [19]:
from torch import tensor, nn

input_tensor = tensor([[4.3, 6.1, -2.3]])
input_tensor

tensor([[ 4.3000,  6.1000, -2.3000]])

In [20]:
relu_layer = nn.ReLU()
relu_layer(input_tensor)

tensor([[4.3000, 6.1000, 0.0000]])

### Activation Layer: Leaky ReLU 

In [21]:
from torch import tensor, nn

input_tensor = tensor([[4.3, 6.1, -2.3]])
input_tensor

tensor([[ 4.3000,  6.1000, -2.3000]])

In [22]:
leaky_relu = nn.LeakyReLU(negative_slope=0.05)
leaky_relu(input_tensor)

tensor([[ 4.3000,  6.1000, -0.1150]])

### Forward Pass: Binary Classification

In [23]:
from torch import tensor, nn, randn

input_tensor = randn(
    5, 6
)  # Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor

tensor([[ 1.2558,  2.5483, -0.9604, -0.2577,  1.5662, -1.4024],
        [-0.5768, -0.3578, -0.3991,  0.8474,  1.4971,  0.8542],
        [-1.9984,  1.9518, -0.9025,  1.8610, -1.2489, -0.3835],
        [ 1.4620, -2.3674, -0.5895, -0.9435,  1.2057, -0.2547],
        [-2.7283, -1.5283,  0.4255, -0.8530, -0.9489,  1.3232]])

In [24]:
model = nn.Sequential(
    nn.Linear(in_features=6, out_features=4),  # First Linear Layer
    nn.Linear(in_features=4, out_features=1),  # Second Linear Layer
    nn.Sigmoid(),  # Sigmoid Activation Function
)

model(input_tensor)

tensor([[0.5889],
        [0.5993],
        [0.4454],
        [0.7891],
        [0.6513]], grad_fn=<SigmoidBackward0>)

### Forward Pass: Multi-class Classification

In [25]:
from torch import tensor, nn, randn

# Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor = randn(5, 6)
input_tensor

tensor([[-0.6488, -0.8013,  1.2973,  0.4450,  0.1483,  0.2320],
        [ 0.9640,  1.6070,  0.9059,  0.3570,  1.0953, -1.0495],
        [ 1.1383,  0.1963,  0.2375, -0.6158,  0.5977, -0.9067],
        [ 1.4179, -0.5475, -0.2657, -0.2548,  1.9740, -1.1713],
        [-0.1146,  1.0470, -1.0052, -1.0472,  1.8581,  3.2596]])

In [26]:
n_classes = 3

model = nn.Sequential(
    nn.Linear(in_features=6, out_features=4),  # First Linear Layer
    nn.Linear(in_features=4, out_features=n_classes),  # Second Linear Layer
    nn.Softmax(dim=-1),  # Softmax Activation Function
)

model(input_tensor)

tensor([[0.4460, 0.2897, 0.2643],
        [0.4005, 0.3821, 0.2175],
        [0.3656, 0.3874, 0.2470],
        [0.2497, 0.5156, 0.2347],
        [0.3144, 0.4668, 0.2188]], grad_fn=<SoftmaxBackward0>)

### Forward Pass: Regression

In [27]:
from torch import tensor, nn, randn

# Creates a tensor of shape (5, 6) with random values from normal distribution
input_tensor = randn(5, 6)
input_tensor

tensor([[-0.1169, -0.6382,  0.8898,  1.5772, -0.5200, -0.1140],
        [-0.3271, -1.1307, -0.8220,  0.0494, -0.2145,  0.3768],
        [-1.0367,  0.1250,  0.1726,  1.2998, -0.6286,  0.3380],
        [-0.0935, -0.9291, -0.4282, -0.2015,  1.6182, -0.8223],
        [-0.2330, -0.1666, -1.4053,  0.1002, -0.7334, -2.0994]])

In [28]:
model = nn.Sequential(
    nn.Linear(in_features=6, out_features=4),  # First Linear Layer
    nn.Linear(in_features=4, out_features=1),  # Second Linear Layer
)

model(input_tensor)

tensor([[-0.1355],
        [ 0.0277],
        [-0.2279],
        [-0.1313],
        [ 0.0780]], grad_fn=<AddmmBackward0>)

## Loss Functions

### Transforming Labels with One-hot Encoding

In [29]:
from torch import tensor
import torch.nn.functional as F

F.one_hot(tensor([0, 1, 2]), num_classes=3)

tensor([[1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]])

### Cross Entropy Loss

In [30]:
from torch import tensor, nn

y_hat = tensor([[-5.2, 4.6, 0.8]])
y_hat

tensor([[-5.2000,  4.6000,  0.8000]])

In [31]:
y = tensor([0])
one_hot_y = F.one_hot(y, num_classes=3)
one_hot_y

tensor([[1, 0, 0]])

In [32]:
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(y_hat.double(), one_hot_y.double())
loss

tensor(9.8222, dtype=torch.float64)

## Sample Dataset

#### Load sample dataset

In [33]:
import pandas as pd

animals = pd.read_csv("animal_dataset.csv")
animals

Unnamed: 0,animal_name,hair,feathers,eggs,milk,predator,legs,tail,type
0,sparrow,0,1,1,0,0,2,1,0
1,eagle,0,1,1,0,1,2,1,0
2,cat,1,0,0,1,1,4,1,1
3,dog,1,0,0,1,0,4,1,1
4,lizard,0,0,1,0,1,4,1,2


#### Features

In [34]:
features = animals.iloc[:, 1:-1]
features

Unnamed: 0,hair,feathers,eggs,milk,predator,legs,tail
0,0,1,1,0,0,2,1
1,0,1,1,0,1,2,1
2,1,0,0,1,1,4,1
3,1,0,0,1,0,4,1
4,0,0,1,0,1,4,1


In [35]:
X = features.to_numpy()
X

array([[0, 1, 1, 0, 0, 2, 1],
       [0, 1, 1, 0, 1, 2, 1],
       [1, 0, 0, 1, 1, 4, 1],
       [1, 0, 0, 1, 0, 4, 1],
       [0, 0, 1, 0, 1, 4, 1]])

#### Target Values

In [36]:
target = animals.iloc[:, -1]
target

0    0
1    0
2    1
3    1
4    2
Name:  type, dtype: int64

In [37]:
y = target.to_numpy()
y

array([0, 0, 1, 1, 2])

#### TensorDataset

In [38]:
from torch.utils.data import TensorDataset
from torch import tensor

dataset = TensorDataset(tensor(X), tensor(y))
dataset

<torch.utils.data.dataset.TensorDataset at 0x135aab7a0>

In [39]:
input_sample, label_sample = dataset[0]
print(f"input_sample: {input_sample}")
print(f"label_sample: {label_sample}")

input_sample: tensor([0, 1, 1, 0, 0, 2, 1])
label_sample: 0


#### DataLoader

In [40]:
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [41]:
for batch_inputs, batch_labels in dataloader:
    print(f"batch_inputs: {batch_inputs}")
    print(f"batch_labels: {batch_labels}")
    print("=" * 50)

batch_inputs: tensor([[1, 0, 0, 1, 1, 4, 1],
        [1, 0, 0, 1, 0, 4, 1]])
batch_labels: tensor([1, 1])
batch_inputs: tensor([[0, 0, 1, 0, 1, 4, 1],
        [0, 1, 1, 0, 1, 2, 1]])
batch_labels: tensor([2, 0])
batch_inputs: tensor([[0, 1, 1, 0, 0, 2, 1]])
batch_labels: tensor([0])


## Training Loop

### Load Salary Dataset

In [42]:
import pandas as pd

salary = pd.read_csv("salary_dataset.csv")
salary

Unnamed: 0,experience_level,employment_type,remote_ratio,company_size,salary_in_usd
0,0,0,0.5,1,0.036
1,1,0,1.0,2,0.133
2,2,0,0.0,1,0.234
3,1,0,1.0,0,0.076
4,2,0,1.0,1,0.17


### Features & Target

In [43]:
features = salary.iloc[:, :-1]
features

Unnamed: 0,experience_level,employment_type,remote_ratio,company_size
0,0,0,0.5,1
1,1,0,1.0,2
2,2,0,0.0,1
3,1,0,1.0,0
4,2,0,1.0,1


In [44]:
X = features.to_numpy()
X

array([[0. , 0. , 0.5, 1. ],
       [1. , 0. , 1. , 2. ],
       [2. , 0. , 0. , 1. ],
       [1. , 0. , 1. , 0. ],
       [2. , 0. , 1. , 1. ]])

In [45]:
target = salary.iloc[:, -1]
target

0    0.036
1    0.133
2    0.234
3    0.076
4    0.170
Name:  salary_in_usd, dtype: float64

In [46]:
y = target.to_numpy()
y

array([0.036, 0.133, 0.234, 0.076, 0.17 ])

### Tensor Dataset & DataLoader

In [47]:
from torch.utils.data import TensorDataset, DataLoader
from torch import tensor

dataset = TensorDataset(tensor(X).float(), tensor(y).float())
dataset

<torch.utils.data.dataset.TensorDataset at 0x177b324b0>

In [48]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

### Model, Loss Function & SDG (Stochastic Gradient Descent) Optimizer

In [49]:
from torch import nn, optim

model = nn.Sequential(nn.Linear(4, 2), nn.Linear(2, 1))

# Loss Function a.k.a Criterion
criterion = nn.MSELoss()

# learning rate controls the step size
# typical range between 0.01 and 0.0001
learning_rate = 0.001

# momentum helps escape local optima
# typical range between 0.85 and 0.99
momentum = 0.8

optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

### The Training Loop

In [50]:
num_epochs = 10

for epoch in range(num_epochs):
    for data in dataloader:
        optimizer.zero_grad()
        
        feature, target = data
        pred = model(feature)
        # Reshape target to match prediction shape
        target = target.view(-1, 1)

        loss = criterion(pred, target)
        loss.backward()
        optimizer.step()

## Placeholder