In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
model = nn.Linear(3, 1)  # simple layer
print(model)

x = torch.tensor([[1.0, 2.0, 3.0]])
target = torch.tensor([[10.0]])

criterion = nn.MSELoss()

output = model(x)
print(output)

loss = criterion(output, target)
print(loss)

loss.backward()  # calculates gradients for all weights

print(model.weight.grad)
print(model.bias.grad)

Linear(in_features=3, out_features=1, bias=True)
tensor([[-0.8719]], grad_fn=<AddmmBackward0>)
tensor(118.1972, grad_fn=<MseLossBackward0>)
tensor([[-21.7437, -43.4874, -65.2311]])
tensor([-21.7437])


# We want the model to learn this function: y=3x+1


In [None]:
# 1. A simple neural network (one layer)
# Create a tiny model
model = nn.Linear(1, 1)  # y = wx + b

# 2. Training data
x = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[4.0], [7.0], [10.0], [13.0]])

# 3. Loss function + optimizer
criterion = nn.MSELoss()  # mean squared error
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 4. Training loop
for epoch in range(1000):
    # 1. Forward pass
    preds = model(x)  # model computes y_pred (y=xW+b)

    # 2. Loss (Difference between prediction and real value)
    loss = criterion(preds, y)

    # 3. Zero gradients (important!)
    optimizer.zero_grad()

    # 4. Backward pass
    # compute ∂loss/∂W ,compute ∂loss/∂b, Autograd builds the whole graph and backpropagates through it
    loss.backward()  # compute gradients

    # 5. Update weights
    optimizer.step()

    if epoch % 200 == 0:
        print(epoch, loss.item())

# 5. test the model
test = torch.tensor([[5.0]])
print(model(test))

0 112.34922790527344
200 0.00021380113321356475
400 6.443906022468582e-05
600 1.9426728613325395e-05
800 5.855358267581323e-06
tensor([[15.9977]], grad_fn=<AddmmBackward0>)


Input (x) → Linear Layer → Activation → Output → Loss

1. Forward Pass (prediction)
   PyTorch computes step by step:
   z1 = xW1 + b1
   a1 = ReLU(z1)
   z2 = a1W2 + b2
   prediction = z2

loss = MSE(prediction, target)

x
↓
[Layer 1] W1, b1
↓
ReLU
↓
[Layer 2] W2, b2
↓
prediction
↓
loss

2. Backward Pass (computing gradients)
   loss
   ↑
   prediction
   ↑
   W2, b2 ← gradient goes backward
   ↑
   ReLU
   ↑
   W1, b1 ← gradient goes backward
   ↑
   x
   Everything flows backwards — this is backpropagation


nn.ReLU(): Activation functions make the network non-linear, so it can learn real-world patterns.

if x < 0 → output = 0
if x > 0 → output = x


In [5]:
model = nn.Sequential(
    # layer 1
    nn.Linear(1, 3),
    #
    nn.ReLU(),
    # layer 2
    nn.Linear(3, 1),
)

x = torch.tensor([[1.0], [2.0], [3.0]])
y = torch.tensor([[3.0], [5.0], [7.0]])

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(300):
    pred = model(x)
    loss = criterion(pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(epoch, loss.item())

0 17.78514862060547
100 0.0503237210214138
200 0.011900975368916988


In [None]:
# Test the model

test_input = torch.tensor([[4.0]])

# 1. Switch to eval mode (This tells layers like dropout/batchnorm (if you had them) to behave correctly)
model.eval()

# 2. Disable autograd during testing
with torch.no_grad():
    prediction = model(test_input)

print(prediction)
print(prediction.item())

tensor([[9.1066]])
9.106607437133789


# build a full CNN (Convolutional Neural Network) from scratch using PyTorch


Neural networks need floating numbers, not integers


In [2]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

Transforms:
are preprocessing steps applied to your images before they enter the neural network.

Pipeline:
Image (0-255) → ToTensor → FloatTensor (0-1) → Normalize → FloatTensor (-1 to 1)


In [None]:
# 1. Prepare the dataset
# We convert images to tensors and normalize them.

transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
print(transform)
train_data = datasets.MNIST(root="data", train=True, download=True, transform=transform)
test_data = datasets.MNIST(root="data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

Compose(
    ToTensor()
    Normalize(mean=(0.5,), std=(0.5,))
)


100%|██████████| 9.91M/9.91M [00:26<00:00, 375kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 142kB/s]
100%|██████████| 1.65M/1.65M [00:02<00:00, 677kB/s] 
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.95MB/s]


In [6]:
# 2.Build a simple CNN model


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        # Convolution Layers: Extract features like:edges, curves, shapes
        self.conv_layer = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # 28×28 → 32 maps
            nn.ReLU(),
            # Max Pooling: Reduces image size → keeps important information
            nn.MaxPool2d(2, 2),  # 28→14
            nn.Conv2d(32, 64, kernel_size=3),  # 14→12
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 12→6
        )
        # Fully Connected Layers: Classify the features into digits 0–9
        self.fc_layer = nn.Sequential(
            nn.Linear(64 * 6 * 6, 128),
            nn.ReLU(),
            nn.Linear(128, 10),  # 10 classes (0–9)
        )

    def forward(self, x):
        x = self.conv_layer(x)
        x = x.view(x.size(0), -1)  # flatten
        x = self.fc_layer(x)
        return x


model = CNN()

In [None]:
#3. Loss + Optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
#4. Training Loop

for epoch in range(5):
    for images, labels in train_loader:
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


Epoch 1, Loss: 0.026803454384207726
Epoch 2, Loss: 0.008835765533149242
Epoch 3, Loss: 0.0003869156935252249
Epoch 4, Loss: 0.005458872299641371
Epoch 5, Loss: 0.005081222392618656


In [9]:
#5. Testing (Accuracy)

correct = 0
total = 0

model.eval()

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 99.10%


# MobileNetV2 pretrained model

--------------------- GPU --------------------------------

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

'cpu'

In [5]:
!which python3
!python3 --version


/Users/ZenaAbdalkarem/Documents/ML/DataTalks-zoomcamp/machine-learning/.venvtorch/bin/python3
Python 3.11.14


In [None]:

print("PyTorch:", torch.__version__)
print("MPS Available:", torch.backends.mps.is_available())
print("MPS Built:", torch.backends.mps.is_built())


PyTorch: 2.9.1
MPS Available: False
MPS Built: True


In [9]:
!sw_vers

ProductName:	macOS
ProductVersion:	12.7.4
BuildVersion:	21H1123


In [8]:
try:
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print("MPS works")
except RuntimeError as e:
    print("MPS failed:", e)

MPS failed: The MPS backend is supported on MacOS 14.0+. Current OS version can be queried using `sw_vers`
