In [1]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/MyDrive/Y4S1/CZ4042 Neural Network & Deep Learning/Project/')

Mounted at /content/drive


In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchvision.ops import DeformConv2d
import torch.optim as optim
import numpy as np

In [4]:
# Load your dataset
transform = transforms.Compose([transforms.ToTensor()])
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/Y4S1/CZ4042 Neural Network & Deep Learning/Project/datasets', transform=transform)

# Create a DataLoader
loader = DataLoader(dataset, batch_size=1, num_workers=8, shuffle=False)

# Variables to hold the sum of all pixel values and the sum of the squared pixel values
mean = np.zeros(3)
std = np.zeros(3)

# Loop over the dataset
for images, _ in loader:
    # Rearrange batch to be the shape of [B, C, W * H]
    images = images.view(images.size(0), images.size(1), -1)
    # Compute mean and std here
    mean += images.mean(2).sum(0).numpy()
    std += images.pow(2).mean(2).sum(0).numpy()

# Finalize the mean and std
mean /= len(loader.dataset)
std /= len(loader.dataset)
std = np.sqrt(std - mean ** 2)

print(f'Mean: {mean}')
print(f'Std: {std}')


KeyboardInterrupt: ignored

In [None]:
# Modify the transformation for random cropping
transform = transforms.Compose([
    transforms.Resize(256),  # Resize to 256x256
    transforms.RandomCrop(227),  # Random crop to 227x227
    transforms.RandomHorizontalFlip(),  # Random flip
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.463, 0.356, 0.319], std=[0.280, 0.250, 0.234]),
])
# Load dataset
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/Y4S1/CZ4042 Neural Network & Deep Learning/Project/datasets', transform=transform)

# Split into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=50, shuffle=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=50, shuffle=False, num_workers=8)

In [None]:
class AgeGenderNet(nn.Module):
    def __init__(self, num_classes):
        super(AgeGenderNet, self).__init__()

        # First Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=(7, 7), stride=2, padding=3)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.lrn1 = nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2)

        # Second Convolutional Layer
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.lrn2 = nn.LocalResponseNorm(size=5, alpha=1e-4, beta=0.75, k=2)

        # Third Convolutional Layer
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # First Fully Connected Layer
        self.fc1 = nn.Linear(384, 512)
        self.relu4 = nn.ReLU()
        self.drop1 = nn.Dropout()

        # Second Fully Connected Layer
        self.fc2 = nn.Linear(512, 512)
        self.relu5 = nn.ReLU()
        self.drop2 = nn.Dropout()

        # Third Fully Connected Layer
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        # First Convolutional Layer operations
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.lrn1(x)

        # Second Convolutional Layer operations
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.lrn2(x)

        # Third Convolutional Layer operations
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)

        # Global Average Pooling
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor

        # First Fully Connected Layer operations
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.drop1(x)

        # Second Fully Connected Layer operations
        x = self.fc2(x)
        x = self.relu5(x)
        x = self.drop2(x)

        # Third Fully Connected Layer
        x = self.fc3(x)
        # Note that softmax is typically not applied during training because it's included in the loss function
        return x


In [None]:
# Initialize the network
num_classes = 10  # Adjust the number of classes if different
model = AgeGenderNet(num_classes)

# # Weight initialization
# def init_weights(m):
#     if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
#         torch.nn.init.normal_(m.weight, mean=0.0, std=0.01)
#         if m.bias is not None:
#             torch.nn.init.zeros_(m.bias)

# Kaiming/He normal initialization for weights (good for layers before ReLU activations)
def init_weights_kaiming(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        nn.init.constant_(m.bias, 0)

model.apply(init_weights_kaiming)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()  # This criterion combines nn.LogSoftmax() and nn.NLLLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
print(f"Using device : {device}")

Using device : cuda


In [None]:
model = model.to(device)

In [None]:
num_epochs = 10

def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

def evaluate_model(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():  # No gradients are needed for evaluation
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader.dataset)
    val_accuracy = correct / total
    return val_loss, val_accuracy


for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total_samples = 0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)  # Don't forget to move the data to the device
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate the batch's loss and accuracy
        running_loss += loss.item() * images.size(0)
        running_corrects += (outputs.argmax(1) == labels).sum().item()
        total_samples += labels.size(0)

        if i % 100 == 0:
            iteration_loss = running_loss / total_samples
            iteration_acc = running_corrects / total_samples
            print(f'Iteration: {i}, Iteration loss: {iteration_loss:.4f}, Iteration accuracy: {iteration_acc:.4f}')

    epoch_loss = running_loss / total_samples
    epoch_acc = running_corrects / total_samples

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

    # after each epoch, evaluate the model
    val_loss, val_accuracy = evaluate_model(model, val_loader, criterion)
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    scheduler.step()

Iteration: 0, Iteration loss: 2.8635, Iteration accuracy: 0.0400
Iteration: 100, Iteration loss: 0.9834, Iteration accuracy: 0.4836
Iteration: 200, Iteration loss: 0.8875, Iteration accuracy: 0.4988
Iteration: 300, Iteration loss: 0.8468, Iteration accuracy: 0.5027
Epoch [1/10], Loss: 0.8381, Accuracy: 0.5030
Epoch [1/10], Validation Loss: 0.0143, Validation Accuracy: 0.5275
Iteration: 0, Iteration loss: 0.7692, Iteration accuracy: 0.4600
Iteration: 100, Iteration loss: 0.7569, Iteration accuracy: 0.4984
Iteration: 200, Iteration loss: 0.7538, Iteration accuracy: 0.5013
Iteration: 300, Iteration loss: 0.7503, Iteration accuracy: 0.5035
Epoch [2/10], Loss: 0.7486, Accuracy: 0.5029
Epoch [2/10], Validation Loss: 0.0142, Validation Accuracy: 0.5266
Iteration: 0, Iteration loss: 0.7316, Iteration accuracy: 0.5200
Iteration: 100, Iteration loss: 0.7332, Iteration accuracy: 0.5152
Iteration: 200, Iteration loss: 0.7324, Iteration accuracy: 0.5194
Iteration: 300, Iteration loss: 0.7314, Itera