# 08 Deep Learning Homework

In [88]:
# set random seed generators
import numpy as np
import torch

SEED = 42
np.random.seed()
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Class for Loading the Data

In [89]:
import os
from torch.utils.data import Dataset
from PIL import Image

class HairDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform=transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(self.data_dir))
        self.class_to_idx = {cls:i for i,cls in enumerate(self.classes)}

        for label in self.classes:
            label_dir = os.path.join(self.data_dir, label)
            for img in os.listdir(label_dir):
                img = os.path.join(self.data_dir, label, img)
                self.image_paths.append(img)
                self.labels.append(self.class_to_idx[label])

    def show_paths(self):
        print(self.image_paths)

    def show_labels(self):
        print(self.labels)

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


## Simple Preprocessing

In [92]:
from torchvision import transforms

# make sure the initial input is of size (200, 200, 3)
input_size=200

# imagenet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms: (1) Resize the input (2) convert to a tensor (3) Normalize the image

train_transforms = transforms.Compose([
    transforms.Resize(size=(input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize(size=(input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])


## Create DataLoaders
- load the train and validation datasets
- use the `DataLoader` to setup the batch size, and shuffle images for training and validation

In [93]:
from torch.utils.data import DataLoader

train_ds = HairDataset(data_dir="../datasets/08-homework_dataset/train/",
                       transform=train_transforms
                       )

val_ds = HairDataset(data_dir="../datasets/08-homework_dataset/test/",
                     transform=val_transforms
                     )

batch_size=20

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=batch_size,
    shuffle=True
)

val_loader = DataLoader(
    dataset=val_ds,
    batch_size=batch_size,
    shuffle=False
)



In [98]:
import torch.nn as nn
import torchvision.models as models

# num classes will be changed to 32
class HairClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3,3), padding=0, stride=1)
        self.pooling = nn.MaxPool2d(kernel_size=(2,2))
        self.relu = nn.ReLU()
        self.dense_1 = nn.Linear(313632, 64)
        self.dense_2 = nn.Linear(64, 1)


    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        x = self.pooling(x)
        x = torch.flatten(x, 1)
        x = self.dense_1(x)
        x = self.relu(x)
        x = self.dense_2(x)
        
        return x

## Train the Model

### Question 1

Which loss function you will use?

__A: nn.BCEWithLogitsLoss()__

In [100]:
import torch
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = HairClassifier()
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

criterion = nn.BCEWithLogitsLoss()

### Question 2

What's the total number of parameters of the model? You can use torchsummary or count manually.

__Answer: 20073473__

In [96]:

# manually find the parameter count
sum(p.numel() for p in model.parameters())

20073473

### Actual training of the model

In [None]:

def train_model():
    num_epochs = 10
    history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_ds)
        epoch_acc = correct_train / total_train
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)

        model.eval()
        val_running_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                labels = labels.float().unsqueeze(1)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item() * images.size(0)
                predicted = (torch.sigmoid(outputs) > 0.5).float()
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_epoch_loss = val_running_loss / len(val_ds)
        val_epoch_acc = correct_val / total_val
        history['val_loss'].append(val_epoch_loss)
        history['val_acc'].append(val_epoch_acc)

        print(f"Epoch {epoch+1}/{num_epochs}, "
            f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
            f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")
        
        return history

Epoch 1/10, Loss: 0.6252, Acc: 0.6454, Val Loss: 0.7724, Val Acc: 0.5572
Epoch 2/10, Loss: 0.5806, Acc: 0.6929, Val Loss: 0.6240, Val Acc: 0.6667
Epoch 3/10, Loss: 0.5081, Acc: 0.7478, Val Loss: 0.7392, Val Acc: 0.6219
Epoch 4/10, Loss: 0.5087, Acc: 0.7428, Val Loss: 0.6009, Val Acc: 0.7065
Epoch 5/10, Loss: 0.4088, Acc: 0.8102, Val Loss: 0.6288, Val Acc: 0.6816
Epoch 6/10, Loss: 0.3397, Acc: 0.8539, Val Loss: 0.5748, Val Acc: 0.7214
Epoch 7/10, Loss: 0.3321, Acc: 0.8539, Val Loss: 0.6813, Val Acc: 0.6915
Epoch 8/10, Loss: 0.2222, Acc: 0.9363, Val Loss: 0.6071, Val Acc: 0.7512
Epoch 9/10, Loss: 0.1674, Acc: 0.9401, Val Loss: 0.6133, Val Acc: 0.7463
Epoch 10/10, Loss: 0.1704, Acc: 0.9401, Val Loss: 0.9001, Val Acc: 0.6866


### Question 3

What is the median of training accuracy for all the epochs for this model?

__A: 0.84__

In [105]:
np.median(history['acc'])

np.float64(0.8320848938826466)

### Question 4

What is the standard deviation of training loss for all the epochs for this model?

__A: 0.171__

In [106]:
np.std(history['loss'])

np.float64(0.15865462466781946)

## Data Augmentation

Perform data augmentation and run the model again

In [107]:
# make sure the initial input is of size (200, 200, 3)
input_size=200

# imagenet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms: (1) Resize the input (2) convert to a tensor (3) Normalize the image

train_transforms = transforms.Compose([
    transforms.Resize(size=(input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip(),
])

val_transforms = transforms.Compose([
    transforms.Resize(size=(input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip()
])


In [108]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_ds)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(val_ds)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.1683, Acc: 0.9351, Val Loss: 0.7072, Val Acc: 0.7562
Epoch 2/10, Loss: 0.0987, Acc: 0.9663, Val Loss: 0.6710, Val Acc: 0.7562
Epoch 3/10, Loss: 0.0377, Acc: 0.9975, Val Loss: 0.7331, Val Acc: 0.7612
Epoch 4/10, Loss: 0.0396, Acc: 0.9900, Val Loss: 0.8554, Val Acc: 0.7413
Epoch 5/10, Loss: 0.0146, Acc: 1.0000, Val Loss: 0.8343, Val Acc: 0.7512
Epoch 6/10, Loss: 0.0099, Acc: 1.0000, Val Loss: 0.9009, Val Acc: 0.7512
Epoch 7/10, Loss: 0.0078, Acc: 1.0000, Val Loss: 0.9656, Val Acc: 0.7512
Epoch 8/10, Loss: 0.0064, Acc: 1.0000, Val Loss: 0.9055, Val Acc: 0.7313
Epoch 9/10, Loss: 0.0056, Acc: 1.0000, Val Loss: 0.9821, Val Acc: 0.7512
Epoch 10/10, Loss: 0.0046, Acc: 1.0000, Val Loss: 1.0000, Val Acc: 0.7512


### Question 5

Let's train our model for 10 more epochs using the same code as previously.

Note: make sure you don't re-create the model. we want to continue training the model we already started training.
What is the mean of test loss for all the epochs for the model trained with augmentations?

__A: 0.88__

In [109]:
np.median(history['val_loss'])

np.float64(0.8781717992688886)

### Question 6

What's the average of test accuracy for the last 5 epochs (from 6 to 10) for the model trained with augmentations?

__A: 0.68__

In [None]:
np.average(history['val_acc'][5:])

np.float64(0.7472636815920398)