In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
import re

In [3]:
class UTKFaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [f for f in os.listdir(root_dir) if f.endswith(".jpg")]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_name = self.image_paths[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        
        # Extract age from filename
        age = int(re.match(r"(\d+)_", img_name).group(1))
        
        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(age, dtype=torch.float32)



In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [5]:
data_dir = "archive (1)/utkface_aligned_cropped/UTKFace"
dataset = UTKFaceDataset(data_dir, transform=transform)

In [6]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [7]:
class AgeCNN(nn.Module):
    def __init__(self):
        super(AgeCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [8]:
def train_model(model, train_loader, test_loader, epochs=10, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for images, ages in train_loader:
            images, ages = images.to(device), ages.to(device).view(-1, 1)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, ages)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}")
    
    # Evaluate
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for images, ages in test_loader:
            images, ages = images.to(device), ages.to(device).view(-1, 1)
            outputs = model(images)
            loss = criterion(outputs, ages)
            test_loss += loss.item()
    
    test_loss /= len(test_loader)
    print(f"Test MSE Loss: {test_loss:.4f}")
    return test_loss


In [9]:
cnn_model = AgeCNN()
cnn_mse = train_model(cnn_model, train_loader, test_loader)


Epoch 1, Train Loss: 261.0037
Epoch 2, Train Loss: 128.8058
Epoch 3, Train Loss: 97.1538
Epoch 4, Train Loss: 78.1315
Epoch 5, Train Loss: 62.5130
Epoch 6, Train Loss: 50.5656
Epoch 7, Train Loss: 39.4348
Epoch 8, Train Loss: 30.0549
Epoch 9, Train Loss: 25.2342
Epoch 10, Train Loss: 23.0450
Test MSE Loss: 82.0894


In [None]:
# Train ResNet-18
resnet18 = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

# Modify the fully connected layer for regression (1 output)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 1)

resnet_mse = train_model(resnet18, train_loader, test_loader)

print(f"CNN Test MSE: {cnn_mse:.4f}, ResNet-18 Test MSE: {resnet_mse:.4f}")

Epoch 1, Train Loss: 159.2431
Epoch 2, Train Loss: 75.8981
Epoch 3, Train Loss: 66.4541
Epoch 4, Train Loss: 54.1481
Epoch 5, Train Loss: 43.9015
Epoch 6, Train Loss: 36.4273
Epoch 7, Train Loss: 28.6846
Epoch 8, Train Loss: 23.8080
Epoch 9, Train Loss: 21.0398
Epoch 10, Train Loss: 16.7118
Test MSE Loss: 56.4122
CNN Test MSE: 82.0894, ResNet-18 Test MSE: 56.4122


- > Which model performs better and why?

Based on the results:

CNN Test MSE: 82.0894

ResNet-18 Test MSE: 56.4122

Since lower MSE (Mean Squared Error) indicates better performance, ResNet-18 performs better than the CNN trained from scratch.

- > WWhy Does ResNet-18 Perform Better?

1. Pretrained Weights & Transfer Learning

ResNet-18 starts with pretrained weights from the ImageNet dataset, which means it has already learned to recognize important visual patterns like edges, textures, shapes, and facial structures. Since these features are universal across different types of images, ResNet-18 can quickly adapt to a new task like age prediction.

On the other hand, a custom CNN trained from scratch has no prior knowledge. It starts with random weights and has to learn everything from zero, including basic patterns. This process takes longer, requires more data, and often results in worse performance compared to a model that benefits from transfer learning.

2. Deeper & More Optimized Architecture

ResNet-18 is a much deeper network compared to a typical CNN, meaning it has more layers that help capture detailed patterns in images. A deeper network allows for better feature extraction, identifying important details such as wrinkles, facial contours, and skin texture, which are useful for age prediction.

On the other hand, a custom CNN trained from scratch likely had fewer layers and parameters, meaning it had a limited ability to recognize complex patterns. With fewer layers, the model may struggle to differentiate subtle age-related features, leading to lower accuracy compared to ResNet-18.

3. Generalization Ability

The pretrained ResNet-18 already has a strong feature extraction capability from large-scale datasets.

The scratch CNN may have suffered from overfitting or struggled to extract meaningful high-level patterns, leading to higher test error.

Conclusion

ResNet-18 is the better model because it uses transfer learning, has a deeper architecture, and generalizes better.

If the dataset were much larger, the scratch CNN might have performed better after sufficient training, but with limited data, ResNet-18 is the preferred choice.