In [1]:
import numpy as np
import torch

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Model  

For this homework we will use Convolutional Neural Network (CNN). We'll use PyTorch.  

You need to develop the model with following structure:  

    The shape for input should be (3, 200, 200) (channels first format in PyTorch)  
    Next, create a convolutional layer (nn.Conv2d):  
        Use 32 filters (output channels)  
        Kernel size should be (3, 3) (that's the size of the filter), padding = 0, stride = 1  
        Use 'relu' as activation  
    Reduce the size of the feature map with max pooling (nn.MaxPool2d)  
        Set the pooling size to (2, 2)  
    Turn the multi-dimensional result into vectors using flatten or view  
    Next, add a nn.Linear layer with 64 neurons and 'relu' activation  
    Finally, create the nn.Linear layer with 1 neuron - this will be the output  
        The output layer should have an activation - use the appropriate activation for the binary classification case  
  
As optimizer use torch.optim.SGD with the following parameters:  
  
    torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)  


In [2]:
# import numpy as np
import os
import torch

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [3]:
class HairDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
# Simple preprocessing

input_size = 200

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
])

validation_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
])

In [5]:
# Create dataloaders

train_dataset = HairDataset(
    data_dir='data/train',
    transform=train_transforms
)

validation_dataset = HairDataset(
    data_dir='data/test',
    transform=validation_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=False)

In [6]:
import torch
import torch.nn as nn

class HairClassifierCNN200(nn.Module):
    def __init__(self):
        super(HairClassifierCNN200, self).__init__()

        # Convolutional layer
        self.conv = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=0)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 99 * 99, 64)
        self.fc2 = nn.Linear(64, 1)  

    def forward(self, x):
        # Conv + ReLU + Pooling
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x)
        # Flatten
        x = torch.flatten(x, 1)
        # Linea1 1 + RelU
        x = self.fc1(x)
        x = self.relu(x)
        # # Linear 2
        x = self.fc2(x)
        return x


In [7]:
# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = HairClassifierCNN200()
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

criterion = nn.BCEWithLogitsLoss()

In [8]:
# Option 1: Using torchsummary (install with: pip install torchsummary)
from torchsummary import summary
summary(model, input_size=(3, 200, 200))

# Option 2: Manual counting
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
            Linear-4                   [-1, 64]      20,072,512
              ReLU-5                   [-1, 64]               0
            Linear-6                    [-1, 1]              65
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 21.54
Params size (MB): 76.57
Estimated Total Size (MB): 98.57
----------------------------------------------------------------
Total parameters: 20073473


Question 3 & 4

In [9]:
# Simple preprocessing

input_size = 200

train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) # ImageNet normalization
])

validation_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) 
])

In [10]:
# Create dataloaders

train_dataset = HairDataset(
    data_dir='data/train',
    transform=train_transforms
)

validation_dataset = HairDataset(
    data_dir='data/test',
    transform=validation_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

In [11]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6269, Acc: 0.6454, Val Loss: 0.6720, Val Acc: 0.6020
Epoch 2/10, Loss: 0.5853, Acc: 0.6991, Val Loss: 0.6943, Val Acc: 0.6517
Epoch 3/10, Loss: 0.5180, Acc: 0.7378, Val Loss: 0.7620, Val Acc: 0.6418
Epoch 4/10, Loss: 0.5536, Acc: 0.7141, Val Loss: 0.7362, Val Acc: 0.6070
Epoch 5/10, Loss: 0.5057, Acc: 0.7291, Val Loss: 0.6399, Val Acc: 0.6816
Epoch 6/10, Loss: 0.4244, Acc: 0.7978, Val Loss: 0.7115, Val Acc: 0.6567
Epoch 7/10, Loss: 0.3493, Acc: 0.8352, Val Loss: 0.7469, Val Acc: 0.6169
Epoch 8/10, Loss: 0.3363, Acc: 0.8539, Val Loss: 0.6735, Val Acc: 0.6716
Epoch 9/10, Loss: 0.2267, Acc: 0.9126, Val Loss: 0.7691, Val Acc: 0.6517
Epoch 10/10, Loss: 0.2189, Acc: 0.9139, Val Loss: 0.9655, Val Acc: 0.6915


In [12]:
f'Median of training accuracy: {np.mean(history['acc'])}'

'Median of training accuracy: 0.7838951310861424'

In [13]:
f'Standard deviation of training loss: {np.std(history['loss'])}'

'Standard deviation of training loss: 0.13878674907896324'

Question 5  Data Augmentation

In [14]:
# Simple preprocessing

input_size = 200

train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]) # ImageNet normalization
])

validation_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) 
])

In [15]:
# Create dataloaders

train_dataset = HairDataset(
    data_dir='data/train',
    transform=train_transforms
)

validation_dataset = HairDataset(
    data_dir='data/test',
    transform=validation_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

In [16]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.7237, Acc: 0.6392, Val Loss: 0.6721, Val Acc: 0.6468
Epoch 2/10, Loss: 0.6211, Acc: 0.6592, Val Loss: 0.9167, Val Acc: 0.5522
Epoch 3/10, Loss: 0.6334, Acc: 0.6554, Val Loss: 0.6797, Val Acc: 0.6318
Epoch 4/10, Loss: 0.5897, Acc: 0.7016, Val Loss: 0.6452, Val Acc: 0.6468
Epoch 5/10, Loss: 0.5706, Acc: 0.7179, Val Loss: 0.5790, Val Acc: 0.7015
Epoch 6/10, Loss: 0.5794, Acc: 0.7016, Val Loss: 0.5579, Val Acc: 0.7114
Epoch 7/10, Loss: 0.5236, Acc: 0.7441, Val Loss: 0.5471, Val Acc: 0.7015
Epoch 8/10, Loss: 0.5479, Acc: 0.7091, Val Loss: 0.5909, Val Acc: 0.6766
Epoch 9/10, Loss: 0.5229, Acc: 0.7316, Val Loss: 0.8330, Val Acc: 0.5721
Epoch 10/10, Loss: 0.5379, Acc: 0.7141, Val Loss: 0.6173, Val Acc: 0.6766


In [17]:
f'mean of test loss for all the epochs: {np.mean(history['val_loss'])}'

'mean of test loss for all the epochs: 0.6638691819124666'

In [18]:
f'Average of test accuracy for the last 5 epochs (from 6 to 10): {np.mean(history['val_acc'][5:])}'

'Average of test accuracy for the last 5 epochs (from 6 to 10): 0.6676616915422885'