In [134]:
# "https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip"

In [135]:
dataset_path = "../../../data/straight-curly-data"

In [136]:
import torch
import numpy as np

In [137]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [138]:
import torchvision.models as models
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [139]:
# Custom Dataset Class

import os
from torch.utils.data import Dataset
from PIL import Image


class HairDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [140]:
# Preprocessing
input_size = 200

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose(
    [
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

In [141]:
from torch.utils.data import DataLoader

train_dataset = HairDataset(
    data_dir=f"{dataset_path}/train", transform=train_transforms
)

val_dataset = HairDataset(
    data_dir=f"{dataset_path}/validation", transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=20, shuffle=False)

In [142]:
# 2.Build a simple CNN model
import torch.nn as nn
import torch.optim as optim


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # Convolution Layers: Extract features like:edges, curves, shapes
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=0),  # 28×28 → 32 maps
            nn.ReLU(),
            # Max Pooling: Reduces image size → keeps important information
            nn.MaxPool2d(2, 2),  # 28→14
            nn.Flatten(),
        )
        # Dynamically compute fc input features
        dummy_input = torch.zeros(1, 3, 200, 200)
        dummy_out = self.conv_layer(dummy_input)
        n_features = dummy_out.numel()  # total flattened size

        # Fully Connected Layers: Classify the features into digits
        self.fc = nn.Sequential(nn.Linear(n_features, 64), nn.ReLU())

        self.output_layer = nn.Sequential(nn.Linear(64, 1), nn.Sigmoid())

    def forward(self, x):
        x = self.conv_layer(x)
        x = self.fc(x)
        x = self.output_layer(x)
        return x


model = CNN()

In [143]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

Question 1


In [144]:
criterion = nn.CrossEntropyLoss()

Question 2


In [145]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 20073473


In [146]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [147]:
# Training Loop
def train_and_evaluate():
    num_epochs = 10
    history = {"acc": [], "loss": [], "val_acc": [], "val_loss": []}

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(
                1
            )  # Ensure labels are float and have shape (batch_size, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_dataset)
        epoch_acc = correct_train / total_train
        history["loss"].append(epoch_loss)
        history["acc"].append(epoch_acc)

        model.eval()
        val_running_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                labels = labels.float().unsqueeze(1)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item() * images.size(0)
                predicted = (torch.sigmoid(outputs) > 0.5).float()
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_epoch_loss = val_running_loss / len(val_dataset)
        val_epoch_acc = correct_val / total_val
        history["val_loss"].append(val_epoch_loss)
        history["val_acc"].append(val_epoch_acc)

        print(
            f"Epoch {epoch+1}/{num_epochs}, "
            f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
            f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}"
        )
    return history

In [148]:
history = train_and_evaluate()

Epoch 1/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 2/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 3/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 4/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 5/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 6/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 7/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 8/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 9/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 10/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876


Question 3


In [149]:
med = np.median(history["acc"])
print(med)

0.4868913857677903


Question 4


In [150]:
np.std(history["loss"])

np.float64(0.0)

Data Augmentation

In [153]:
# Preprocessing
input_size = 200

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose(
    [
        transforms.RandomRotation(50),
        transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

train_dataset = HairDataset(
    data_dir=f"{dataset_path}/train", transform=train_transforms
)

val_dataset = HairDataset(
    data_dir=f"{dataset_path}/validation", transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=20, shuffle=False)

In [154]:
def train_and_evaluate():
    num_epochs = 10
    history = {"acc": [], "loss": [], "val_acc": [], "val_loss": []}

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(
                1
            )  # Ensure labels are float and have shape (batch_size, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_dataset)
        epoch_acc = correct_train / total_train
        history["loss"].append(epoch_loss)
        history["acc"].append(epoch_acc)

        model.eval()
        val_running_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                labels = labels.float().unsqueeze(1)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item() * images.size(0)
                predicted = (torch.sigmoid(outputs) > 0.5).float()
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_epoch_loss = val_running_loss / len(val_dataset)
        val_epoch_acc = correct_val / total_val
        history["val_loss"].append(val_epoch_loss)
        history["val_acc"].append(val_epoch_acc)

        print(
            f"Epoch {epoch+1}/{num_epochs}, "
            f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
            f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}"
        )
    return history

Question 5


In [155]:
history = train_and_evaluate()

Epoch 1/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 2/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 3/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 4/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 5/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 6/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 7/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 8/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 9/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876
Epoch 10/10, Loss: 0.0000, Acc: 0.4869, Val Loss: 0.0000, Val Acc: 0.4876


In [156]:
np.mean(history["val_loss"])

np.float64(0.0)

Question 6

In [161]:
history["val_acc"][5:]

[0.48756218905472637,
 0.48756218905472637,
 0.48756218905472637,
 0.48756218905472637,
 0.48756218905472637]

In [162]:
np.average(history["val_acc"][5:])

np.float64(0.4875621890547263)