In [18]:
import numpy as np
import pandas as pd
from glob import glob
from os.path import join
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
import torch.optim as optim
from sklearn.model_selection import train_test_split
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
import numpy as np
import pandas as pd
from glob import glob
from os.path import join
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from torchvision.transforms import RandomHorizontalFlip, RandomRotation, ColorJitter
import torch.optim as optim

class AgeDataset(torch.utils.data.Dataset):

    def __init__(self, data_path, annot_path, train=True):
        super(AgeDataset, self).__init__()

        self.annot_path = annot_path
        self.data_path = data_path
        self.train = train

        self.ann = pd.read_csv(annot_path)
        self.files = self.ann['file_id']
        if train:
            self.ages = self.ann['age']
        self.transform = self._transform(224)

    @staticmethod    
    def _convert_image_to_rgb(image):
        return image.convert("RGB")

    def _transform(self, n_px):
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        if self.train:
            return Compose([
                Resize((n_px, n_px)),
                RandomHorizontalFlip(),  # Data augmentation
                RandomRotation(15),  # Data augmentation
                ColorJitter(brightness=0.5, contrast=0.5),  # Data augmentation
                ToTensor(),
                Normalize(mean, std),
            ])
        else:
            return Compose([
                Resize((n_px, n_px)),
                ToTensor(),
                Normalize(mean, std),
            ])

    def read_img(self, file_name):
        im_path = join(self.data_path,file_name)   
        img = Image.open(im_path)
        img = self.transform(img)
        return img

    def __getitem__(self, index):
        file_name = self.files[index]
        img = self.read_img(file_name)
        if self.train:
            age = self.ages[index]
            return img, age
        else:
            return img

    def __len__(self):
        return len(self.files)


train_path = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/train'
train_ann = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/train.csv'
train_dataset = AgeDataset(train_path, train_ann, train=True)


test_path = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/test'
test_ann = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/submission.csv'
test_dataset = AgeDataset(test_path, test_ann, train=False)


train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True,num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)






In [24]:
print(len(train_loader))

667


In [25]:
import torchvision.models as models

pretrained_model = models.resnet18(pretrained=True)



In [26]:
class AgeModel(nn.Module):
    def __init__(self):
        super(AgeModel, self).__init__()
        self.model = models.resnet18(pretrained=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Dropout(0.5),
            nn.Linear(num_ftrs, 1)
        )

    def forward(self, x):
        return self.model(x)

# Instantiate the model
model = AgeModel().to(device)

# Define hyperparameters for tuning
learning_rates = [0.001, 0.0001]
batch_sizes = [32, 64]
best_loss = float('inf')

for lr in learning_rates:
    for batch_size in batch_sizes:
        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.MSELoss()
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
        # Learning rate scheduler
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
        
        print(f'Training with learning rate: {lr} and batch size: {batch_size}')
        for epoch in range(20):  # Set the number of epochs
            model.train()
            running_loss = 0.0
            for i, data in enumerate(train_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels.float())
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            
            # Update the learning rate
            scheduler.step()
            
            # Print loss
            epoch_loss = running_loss / len(train_loader)
            print(f'Epoch {epoch+1}, Loss: {epoch_loss}')
            
            # Checkpoint the best model
            if epoch_loss < best_loss:
                best_loss = epoch_loss
                best_lr = lr
                best_batch_size = batch_size
                torch.save(model.state_dict(), '/kaggle/working/best_model.pth')

print(f'Best model trained with learning rate: {best_lr} and batch size: {best_batch_size}')


Training with learning rate: 0.001 and batch size: 32
Epoch 1, Loss: 405.9693109034777
Epoch 2, Loss: 108.82769402499677
Epoch 3, Loss: 94.91048342749097
Epoch 4, Loss: 88.19894201573224
Epoch 5, Loss: 81.70181176044058
Epoch 6, Loss: 77.6725676649514
Epoch 7, Loss: 75.43683787550347
Epoch 8, Loss: 61.12171602606595
Epoch 9, Loss: 59.21905563045656
Epoch 10, Loss: 55.8144586382956
Epoch 11, Loss: 55.58751121418051
Epoch 12, Loss: 55.0830676151716
Epoch 13, Loss: 53.763831862087905
Epoch 14, Loss: 52.66518777278231
Epoch 15, Loss: 49.883850463684176
Epoch 16, Loss: 51.11191272592616
Epoch 17, Loss: 49.67688494000299
Epoch 18, Loss: 49.776413824604724
Epoch 19, Loss: 50.32285289392657
Epoch 20, Loss: 48.04576221506099
Training with learning rate: 0.001 and batch size: 64
Epoch 1, Loss: 57.518322733348
Epoch 2, Loss: 56.0339717122609
Epoch 3, Loss: 53.43655329264566
Epoch 4, Loss: 53.95883481945106
Epoch 5, Loss: 50.3409277636134
Epoch 6, Loss: 48.98520697519451
Epoch 7, Loss: 47.58119245

In [27]:
@torch.no_grad

def predict(loader, model):
    model.eval()
    predictions = []

    for img in tqdm(loader):
        img = img.to(device)

        pred = model(img)
        predictions.extend(pred.flatten().detach().tolist())

    return predictions

preds = predict(test_loader, model)

submit = pd.read_csv('/kaggle/input/smai-24-age-prediction/content/faces_dataset/submission.csv')
submit['age'] = preds
submit.head()

submit.to_csv('baseline.csv',index=False)

100%|██████████| 31/31 [00:09<00:00,  3.32it/s]
