# Pytorch Tutorial (SMAI Spring'25)

## Tensors
- Tensors are the fundamental data structures used for numerical computations
- Similar to NumPy arrays, additional capabilities like GPU acceleration and automatic differentiation.

🔹 Key Features of Tensors
- Multi-Dimensional Arrays – Similar to NumPy arrays but optimized for deep learning.
- GPU Acceleration – Can be moved between CPU and GPU (.cuda(), .to(device)).
- Autograd Support – Tracks gradients for automatic differentiation (requires_grad=True).
- Efficient Computations – Uses optimized low-level operations via torch.nn and torch.optim.
- Type Flexibility – Supports various data types (torch.float32, torch.int64, etc.).

In [None]:
import torch
import numpy as np

In [None]:
tensor_1 = torch.tensor([1, 2, 3])
tensor_2 = torch.tensor([[1.0], [3.0]])

print(tensor_1.shape)
print(tensor_2.shape)

torch.Size([3])
torch.Size([2, 1])


In [None]:
tensor_2.shape[0],tensor_2.shape[1]

(2, 1)

## Convert NumPy to PyTorch Tensor and Vice Versa

In [None]:
np_array = np.array([[1, 2, 3], [4, 5, 6]])
torch_tensor = torch.tensor(np_array)  # OR torch.from_numpy(np_array)
print("Torch Tensor:",torch_tensor)

# PyTorch Tensor to NumPy
back_to_numpy = torch_tensor.numpy()
print("Numpy Array: ",back_to_numpy)

Torch Tensor: tensor([[1, 2, 3],
        [4, 5, 6]])
Numpy Array:  [[1 2 3]
 [4 5 6]]


## Setting Device (CPU/GPU)

In [None]:
# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Move tensor to GPU
gpu_tensor = torch.tensor([1, 2, 3]).to(device)

Using device: cuda


## Making Tensors Learnable (Parameters in a Model)

In [None]:
from torch import nn

# Define a simple linear model with learnable weights
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.weight = nn.Parameter(torch.randn(1))  # Learnable parameter

    def forward(self, x):
        return x * self.weight  # Simple multiplication

model = SimpleModel()
print(model.weight)  # Initial random weight

Parameter containing:
tensor([-2.0084], requires_grad=True)


## Using CUDA for Model Training

In [None]:
model = SimpleModel().to(device)

# Create input tensor and move to device
input_tensor = torch.tensor([5.0]).to(device)

# Perform forward pass
output = model(input_tensor)
print(output)

tensor([1.3746], device='cuda:0', grad_fn=<MulBackward0>)


## Evaluating the Model

In [None]:
model.eval()  # Set model to evaluation mode
with torch.no_grad():  # Disable gradient computation
    test_input = torch.tensor([[4.0]]).to(device)  # Example input
    prediction = model(test_input)
    print(f"Prediction for input 4: {prediction.item()}")
    
    #stuff that is there in the training model - like droputs - are not considered
    #torch.nograd() - use it when evaluating cause we do not want to change weights (wont calculate the gradient at all)
    #without nograd() the fradients keep accumulating and gpu exceeds memory

Prediction for input 4: 1.0996793508529663


## Stacking Tensors

In [None]:
# Create two tensors
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])

# Stack along a new dimension (dim=0)
stacked_0 = torch.stack([a, b], dim=0)
print("Stacked along dim=0:\n", stacked_0)

# Stack along dim=1
stacked_1 = torch.stack([a, b], dim=1)
print("Stacked along dim=1:\n", stacked_1)

Stacked along dim=0:
 tensor([[1, 2, 3],
        [4, 5, 6]])
Stacked along dim=1:
 tensor([[1, 4],
        [2, 5],
        [3, 6]])


## Stacking along the first dimension (row-wise)

In [None]:
a = torch.tensor([[1, 2, 3]])
b = torch.tensor([[4, 5, 6]])

vstacked = torch.vstack([a, b])
print("VStack:\n", vstacked)

VStack:
 tensor([[1, 2, 3],
        [4, 5, 6]])


## Stacking along the second dimension (column-wise)

In [None]:
hstacked = torch.hstack([a, b])
print("HStack:\n", hstacked)

HStack:
 tensor([[1, 2, 3, 4, 5, 6]])


## Concatenating Tensors

In [None]:
# Create tensors
a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([[5, 6], [7, 8]])

# Concatenate along rows (dim=0)
concat_0 = torch.cat((a, b), dim=0)
print("Concatenated along dim=0:\n", concat_0)

# Concatenate along columns (dim=1)
concat_1 = torch.cat((a, b), dim=1)
print("Concatenated along dim=1:\n", concat_1)

Concatenated along dim=0:
 tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])
Concatenated along dim=1:
 tensor([[1, 2, 5, 6],
        [3, 4, 7, 8]])


## Rearranging Dimensions

In [None]:
x = torch.rand(2, 3, 4)  # Shape (batch_size, channels, height)

# Change from (2, 3, 4) to (2, 4, 3)
permuted_x = x.permute(0, 2, 1)
print("Original shape:", x.shape)
print("Permuted shape:", permuted_x.shape)

Original shape: torch.Size([2, 3, 4])
Permuted shape: torch.Size([2, 4, 3])


## Swapping two dimensions

In [None]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]])

# Swap dimensions (0, 1)
transposed_x = torch.transpose(x, 0, 1)
print("Original Tensor:\n", x)
print("Transposed Tensor:\n", transposed_x)

Original Tensor:
 tensor([[1, 2, 3],
        [4, 5, 6]])
Transposed Tensor:
 tensor([[1, 4],
        [2, 5],
        [3, 6]])


## Change tensor shape

In [None]:
x = torch.arange(6)  # Tensor with shape (6,)
print("Original tensor:", x)

# Reshape into (2,3)
reshaped_x = x.view(2, 3)
print("Reshaped tensor:\n", reshaped_x)

# Alternative: torch.reshape
reshaped_x2 = x.reshape(3, 2)
print("Reshaped tensor using reshape:\n", reshaped_x2)

Original tensor: tensor([0, 1, 2, 3, 4, 5])
Reshaped tensor:
 tensor([[0, 1, 2],
        [3, 4, 5]])
Reshaped tensor using reshape:
 tensor([[0, 1],
        [2, 3],
        [4, 5]])


## Broadcasting

In [None]:
t = torch.tensor([[2,3,4],[4,5,6]])
print(t)
t + torch.tensor([1,2,1]) # t + [[1,2,1], [1,2,1]] done internally

tensor([[2, 3, 4],
        [4, 5, 6]])


tensor([[3, 5, 5],
        [5, 7, 7]])

## Building a Neural Network Model in PyTorch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

### Load and Preprocess the Dataset

In [None]:
# Load dataset
california = fetch_california_housing()
X = california.data  # Features (8 features per sample)
y = california.target  # Target variable (house price)

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to NumPy arrays
X = np.float32(X)  # Convert to float32 for PyTorch compatibility
y = np.float32(y).reshape(-1, 1)  # Convert target to float32 and reshape

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Create a Custom Dataset Class

In [None]:
class CaliforniaHousingDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create PyTorch datasets
train_dataset = CaliforniaHousingDataset(X_train, y_train)
test_dataset = CaliforniaHousingDataset(X_test, y_test)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) #loads dataset piece by piece - parallelizes the operation
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


### Define the Neural Network Model

In [None]:
class MLPModel(nn.Module): # exists the init first
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x): #used to do what you want with the input
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for regression
        return x

# Initialize model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = MLPModel(input_size=8, hidden_size=64, output_size=1).to(device)


### Define Loss Function and Optimizer

In [None]:
criterion = nn.MSELoss()  # Mean Squared Error for regression - choose based on your model logic
# nn.crossentroplyloss - applies the softmax on its onw
# accuracy calculation - then apply softmax
# for multilabel classfication - BCE with logic loss (sigmoid function already applied)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# adam is an optimisation function - like sgd

### Training the model

In [None]:
num_epochs = 100
model.train() # Set model to training mode
for epoch in range(num_epochs):
    total_loss = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # Forward pass
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)

        # Backward pass
        optimizer.zero_grad() # if you dont use this, then the previous gradients will be used again - always use this before backward pass
        loss.backward()
        optimizer.step() # updates weights

        total_loss += loss.item()

    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

#for validatino - dont use zerograd or step

Epoch [10/100], Loss: 0.3017
Epoch [20/100], Loss: 0.2754
Epoch [30/100], Loss: 0.2625
Epoch [40/100], Loss: 0.2527
Epoch [50/100], Loss: 0.2428
Epoch [60/100], Loss: 0.2372
Epoch [70/100], Loss: 0.2290
Epoch [80/100], Loss: 0.2224
Epoch [90/100], Loss: 0.2184


### Evaluating the model

In [None]:
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    total_mse = 0
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        predictions = model(X_batch)
        mse = criterion(predictions, y_batch)
        total_mse += mse.item()

print(f"Test MSE Loss: {total_mse/len(test_loader):.4f}")


### Saving the model weights

In [None]:
torch.save(model.state_dict(), "model_weights.pth")
print("Model saved!")

### Saving the Entire Model (Architecture + Weights)

In [None]:
torch.save(model, "model_full.pth")

### Loading the model

In [None]:
# Load model weights
loaded_model = MLPModel(8, 64, 1).to(device)
loaded_model.load_state_dict(torch.load("california_model.pth"))
loaded_model.eval()
print("Model loaded!")

### Making predictions

In [None]:
sample = torch.tensor([X_test[0]]).to(device)  # Single test example
prediction = loaded_model(sample)
print(f"Predicted Price: {prediction.item():.2f}, Actual Price: {y_test[0][0]:.2f}")