In [1]:
import pandas as pd

animals = pd.read_csv('zoo.csv')

In [2]:
import numpy as np

# Define input features
features = animals.iloc[:, 1:-1]
X = features.to_numpy()

print(X)

[[1 0 0 ... 0 0 1]
 [1 0 0 ... 1 0 1]
 [0 0 1 ... 1 0 0]
 ...
 [1 0 0 ... 1 0 1]
 [0 0 1 ... 0 0 0]
 [0 1 1 ... 1 0 0]]


In [3]:
# Define target values (ground truth)
target = animals.iloc[:, -1]
y = target.to_numpy()

print(y)

[1 1 4 1 1 1 1 4 4 1 1 2 4 7 7 7 2 1 4 1 2 2 1 2 6 5 5 1 1 1 6 1 1 2 4 1 1
 2 4 6 6 2 6 2 1 1 7 1 1 1 1 6 5 7 1 1 2 2 2 2 4 4 3 1 1 1 1 1 1 1 1 2 7 4
 1 1 3 7 2 2 3 7 4 2 1 7 4 2 6 5 3 3 4 1 1 2 1 6 1 7 2]


In [4]:
import torch
from torch.utils.data import TensorDataset

# Instantiate dataset class
dataset = TensorDataset(torch.tensor(X), torch.tensor(y))

# Access an individual sample
input_sample, label_sample = dataset[0]
print('input sample:', input_sample)  
print('label sample:', label_sample)

input sample: tensor([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 4, 0, 0, 1])
label sample: tensor(1)


In [5]:
from torch.utils.data import DataLoader

batch_size = 2
shuffle = True

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

- **Epoch**: one full pass through the training dataloader
- **Generalization**: model performs well with unseen data


In [6]:
# Iterate over the dataloader
for batch_inputs, batch_labels in dataloader:
    print('batch_inputs:', batch_inputs)
    print('batch_labels:', batch_labels)

batch_inputs: tensor([[0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 6, 0, 0, 0]])
batch_labels: tensor([4, 7])
batch_inputs: tensor([[1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 1, 0, 1],
        [1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 2, 1, 0, 0]])
batch_labels: tensor([1, 1])
batch_inputs: tensor([[1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 4, 1, 0, 1],
        [1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 1, 0, 0]])
batch_labels: tensor([1, 1])
batch_inputs: tensor([[0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 0]])
batch_labels: tensor([2, 6])
batch_inputs: tensor([[1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 0],
        [1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 1, 1, 0]])
batch_labels: tensor([6, 1])
batch_inputs: tensor([[1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 1, 0, 1],
        [1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 4, 0, 1, 0]])
batch_labels: tensor([1, 1])
batch_inputs: tensor([[0, 1, 1, 0, 0, 0,

In [7]:
# training ver 1

X = animals.iloc[:, 1:-1].to_numpy()  
y = animals.iloc[:, -1].to_numpy()

# Create a dataset
dataset = TensorDataset(torch.tensor(X), torch.tensor(y))

# Print the first sample
input_sample, label_sample = dataset[0]
print('Input sample:', input_sample)
print('Label sample:', label_sample)

Input sample: tensor([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 4, 0, 0, 1])
Label sample: tensor(1)


In [8]:
# training ver 2

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Iterate over the dataloader
for batch_inputs, batch_labels in dataloader:
    print('batch_inputs:', batch_inputs)
    print('batch_labels:', batch_labels)

batch_inputs: tensor([[1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 4, 1, 1, 1],
        [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
batch_labels: tensor([1, 7])
batch_inputs: tensor([[1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 4, 1, 0, 1],
        [1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 4, 1, 0, 1]])
batch_labels: tensor([1, 1])
batch_inputs: tensor([[0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 2, 1, 0, 0],
        [0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 1, 1, 0]])
batch_labels: tensor([2, 2])
batch_inputs: tensor([[0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 1, 1, 0],
        [0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1]])
batch_labels: tensor([2, 4])
batch_inputs: tensor([[0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 4, 0, 0, 0],
        [0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 1, 0, 0]])
batch_labels: tensor([5, 2])
batch_inputs: tensor([[0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 4, 0, 0, 0],
        [0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1, 0, 0]])
batch_labels: tensor([5, 2])
batch_inputs: tensor([[1, 0, 0, 1, 0, 0,

## Data Science Salary Dataset

In [9]:
df = pd.read_csv('data_science_salary.csv')

df.head(10)

Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,company_location,company_size
0,2023,EN,FT,Applied Scientist,213660,USD,213660,US,L
1,2023,EN,FT,Applied Scientist,130760,USD,130760,US,L
2,2023,EN,FT,Data Quality Analyst,100000,USD,100000,NG,L
3,2023,EN,FT,Compliance Data Analyst,30000,USD,30000,NG,L
4,2023,EN,FT,Applied Scientist,204620,USD,204620,US,L
5,2023,EN,FT,Applied Scientist,110680,USD,110680,US,L
6,2023,EN,FT,Machine Learning Engineer,163196,USD,163196,US,M
7,2023,EN,FT,Machine Learning Engineer,145885,USD,145885,US,M
8,2023,EN,FT,Research Scientist,220000,USD,220000,US,L
9,2023,EN,FT,Data Engineer,85000,USD,85000,US,M


- Features: categorical, target: salary (USD)
- Final output: linear layer
- Loss: regression-specific

In [10]:
# Drop columns
df = df.drop(['work_year', 'job_title'], axis=1)

In [11]:
from sklearn.preprocessing import LabelEncoder

# Columns to label encode
cols_to_encode = ['experience_level', 'employment_type', 'salary_currency', 'company_location', 'company_size']

# Apply label encoding
le = LabelEncoder()

for col in cols_to_encode:
    df[col] = le.fit_transform(df[col])


df.head(10)

Unnamed: 0,experience_level,employment_type,salary,salary_currency,salary_in_usd,company_location,company_size
0,0,2,213660,19,213660,70,0
1,0,2,130760,19,130760,70,0
2,0,2,100000,19,100000,53,0
3,0,2,30000,19,30000,53,0
4,0,2,204620,19,204620,70,0
5,0,2,110680,19,110680,70,0
6,0,2,163196,19,163196,70,1
7,0,2,145885,19,145885,70,1
8,0,2,220000,19,220000,70,0
9,0,2,85000,19,85000,70,1


MSE loss is the mean of the squared difference between predictions and ground truth

In [12]:
def mean_squared_loss(prediction, target):
    return np.mean((prediction - target)**2)

In [17]:
# in PyTorch
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

# Select matching rows for features and target
feature_cols = ['experience_level', 'employment_type', 'salary_currency', 'company_location', 'company_size']
df_clean = df.dropna(subset=feature_cols + ['salary_in_usd'])  

features = df_clean[feature_cols].values
target_raw = df_clean[['salary_in_usd']].values

# Normalize the target
scaler = MinMaxScaler()
target_scaled = scaler.fit_transform(target_raw)

# Convert to float32 tensors
X = torch.tensor(features, dtype=torch.float32)
y = torch.tensor(target_scaled, dtype=torch.float32)

# Double-check shapes
print(X.shape)  # (n_samples, 5)
print(y.shape)  # (n_samples, 1)  

torch.Size([3761, 5])
torch.Size([3761, 1])


In [18]:
# Before the training loop

import torch.optim as optim

# Dataset and Dataloader
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Model: input must match number of features (5)
model = nn.Sequential(
    nn.Linear(5, 2),
    nn.ReLU(),
    nn.Linear(2, 1)
)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

# Forward pass test (before training)
prediction = model(X)
loss = criterion(prediction, y)
print(f"Initial loss: {loss.item():.4f}")


Initial loss: 0.9690


In [19]:
# Training Loop

num_epochs = 5

for epoch in range(num_epochs):
    for data in dataloader:
        # Set the gradients to zero        
        optimizer.zero_grad()
        
        # Get feature and target from the data loader        
        feature, target = data
        
        # Run a forward pass        
        pred = model(feature)
        
        # Compute loss and gradients        
        loss = criterion(pred, target)        
        loss.backward()
        
        # Update the parameters        
        optimizer.step()

In [20]:
# training ver 3

y_pred = np.array([3, 5.0, 2.5, 7.0])  
y = np.array([3.0, 4.5, 2.0, 8.0])     

# Calculate MSE using NumPy
mse_numpy = np.mean((y_pred - y)**2)

# Create the MSELoss function in PyTorch
criterion = nn.MSELoss()

# Calculate MSE using PyTorch
mse_pytorch = criterion(torch.tensor(y_pred), torch.tensor(y))

print("MSE (NumPy):", mse_numpy)
print("MSE (PyTorch):", mse_pytorch)

MSE (NumPy): 0.375
MSE (PyTorch): tensor(0.3750, dtype=torch.float64)


In [21]:
# training ver 4

def show_results(model, dataloader):
    model.eval()
    with torch.no_grad():
        for features, targets in dataloader:
            preds = model(features)
            preds = preds.numpy().flatten()
            targets = targets.numpy().flatten()
            for i in range(len(preds)):
                print(f"Ground truth salary: {targets[i]:.3f}. Predicted salary: {preds[i]:.3f}.")

# Loop over the number of epochs and the dataloader
for i in range(num_epochs):
  for data in dataloader:
    # Set the gradients to zero
    optimizer.zero_grad()
    # Run a forward pass
    feature, target = data
    prediction = model(feature)    
    # Compute the loss
    loss = criterion(prediction, target)    
    # Compute the gradients
    loss.backward()
    # Update the model's parameters
    optimizer.step()
    
show_results(model, dataloader)

Ground truth salary: 0.382. Predicted salary: 0.300.
Ground truth salary: 0.416. Predicted salary: 0.300.
Ground truth salary: 0.314. Predicted salary: 0.300.
Ground truth salary: 0.382. Predicted salary: 0.300.
Ground truth salary: 0.236. Predicted salary: 0.300.
Ground truth salary: 0.292. Predicted salary: 0.300.
Ground truth salary: 0.300. Predicted salary: 0.300.
Ground truth salary: 0.601. Predicted salary: 0.300.
Ground truth salary: 0.508. Predicted salary: 0.300.
Ground truth salary: 0.115. Predicted salary: 0.300.
Ground truth salary: 0.015. Predicted salary: 0.300.
Ground truth salary: 0.279. Predicted salary: 0.300.
Ground truth salary: 0.627. Predicted salary: 0.300.
Ground truth salary: 0.292. Predicted salary: 0.300.
Ground truth salary: 0.344. Predicted salary: 0.300.
Ground truth salary: 0.051. Predicted salary: 0.300.
Ground truth salary: 0.371. Predicted salary: 0.300.
Ground truth salary: 0.024. Predicted salary: 0.300.
Ground truth salary: 0.157. Predicted salary: 

## RelU Activation Functions

In [22]:
# Create a ReLU function with PyTorch
relu_pytorch = nn.ReLU()

x_pos = torch.tensor(2.0)
x_neg = torch.tensor(-3.0)

# Apply the ReLU function to the tensors
output_pos = relu_pytorch(x_pos)
output_neg = relu_pytorch(x_neg)

print("ReLU applied to positive value:", output_pos)
print("ReLU applied to negative value:", output_neg)

ReLU applied to positive value: tensor(2.)
ReLU applied to negative value: tensor(0.)


In [23]:
# Create a leaky relu function in PyTorch
leaky_relu_pytorch = nn.LeakyReLU(negative_slope=0.05)

x = torch.tensor(-2.0)
# Call the above function on the tensor x
output = leaky_relu_pytorch(x)
print(output)

tensor(-0.1000)


## Learning Rate and Momentum

In [24]:
sgd = optim.SGD(model.parameters(), lr=0.01, momentum=0.95)

**Two arguments**:
- **learning rate**: controls the step size
- **momentum**: adds inertia to avoid getting stuck


| **Learning Rate**                                | **Momentum**                         |
|--------------------------------------------------|--------------------------------------|
| Controls the step size                           | Controls the inertia                 |
| Too high → poor performance                      | Helps escape local minimum           |
| Too low → slow training                          | Too small → optimizer gets stuck     |
| Typical range: 0.01 (10⁻²) to 0.0001 (10⁻⁴)       | Typical range: 0.85 to 0.99          |
