<a href="https://colab.research.google.com/github/ekvirika/Facial-Expression-Recognition/blob/main/notebooks/05_resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required packages
!pip install wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn

# Set up Kaggle API
!pip install kaggle

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
!unzip -q challenges-in-representation-learning-facial-expression-recognition-challenge.zip


Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 87% 248M/285M [00:00<00:00, 286MB/s]
100% 285M/285M [00:00<00:00, 300MB/s]


In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from datetime import datetime
from tqdm import tqdm

In [6]:
class FER2013Dataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        # Convert pixel string to image
        pixels = np.array(row['pixels'].split(), dtype=np.uint8)
        image = pixels.reshape(48, 48)
        image = Image.fromarray(image).convert('RGB')

        if self.transform:
            image = self.transform(image)

        label = int(row['emotion'])
        return image, label

# Basic Block for ResNet

In [7]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# Bottleneck Block for deeper ResNets


In [8]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
# ResNet Architecture

In [9]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=7, dropout_rate=0.5):
        super(ResNet, self).__init__()
        self.in_planes = 64

        # Initial convolution
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # ResNet layers
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        # Classification head
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.dropout(out)
        out = self.fc(out)

        return out



# Different ResNet configurations

In [10]:
# Different ResNet configurations
def ResNet18(num_classes=7, dropout_rate=0.5):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes, dropout_rate)

def ResNet34(num_classes=7, dropout_rate=0.5):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes, dropout_rate)

def ResNet50(num_classes=7, dropout_rate=0.5):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes, dropout_rate)

def ResNet101(num_classes=7, dropout_rate=0.5):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes, dropout_rate)

# Training Loop

In [11]:

# Training function
def train_model(model, train_loader, val_loader, config, device):
    criterion = nn.CrossEntropyLoss()

    if config['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'],
                              weight_decay=config['weight_decay'])
    elif config['optimizer'] == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config['learning_rate'],
                             momentum=0.9, weight_decay=config['weight_decay'])
    else:
        optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'],
                               weight_decay=config['weight_decay'])

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    best_val_acc = 0.0
    patience = 0
    max_patience = 10

    for epoch in range(config['epochs']):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        with tqdm(train_loader, desc=f'Epoch {epoch+1}/{config["epochs"]} - Training') as pbar:
            for batch_idx, (data, target) in enumerate(pbar):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = output.max(1)
                train_total += target.size(0)
                train_correct += predicted.eq(target).sum().item()

                pbar.set_postfix({
                    'Loss': f'{loss.item():.4f}',
                    'Acc': f'{100.*train_correct/train_total:.2f}%'
                })

        train_acc = 100. * train_correct / train_total
        avg_train_loss = train_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += criterion(output, target).item()

                _, predicted = output.max(1)
                val_total += target.size(0)
                val_correct += predicted.eq(target).sum().item()

        val_acc = 100. * val_correct / val_total
        avg_val_loss = val_loss / len(val_loader)

        # Log to wandb
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': avg_train_loss,
            'train_accuracy': train_acc,
            'val_loss': avg_val_loss,
            'val_accuracy': val_acc,
            'learning_rate': optimizer.param_groups[0]['lr']
        })

        print(f'Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')

        # Early stopping
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience = 0
            # Save best model
            torch.save(model.state_dict(), f'best_model_{wandb.run.name}.pth')
        else:
            patience += 1

        if patience >= max_patience:
            print(f'Early stopping at epoch {epoch+1}')
            break

        scheduler.step()

    return best_val_acc


# Evaluation function

In [12]:

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = output.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    # Calculate metrics
    accuracy = sum(p == t for p, t in zip(all_preds, all_targets)) / len(all_preds) * 100

    # Emotion labels
    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    # Classification report
    report = classification_report(all_targets, all_preds, target_names=emotion_labels, output_dict=True)

    # Confusion matrix
    cm = confusion_matrix(all_targets, all_preds)

    return accuracy, report, cm, all_preds, all_targets

In [13]:
def create_data_transforms(config):
    """Create data transforms with augmentation"""
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),  # ResNet expects 224x224
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    return train_transform, val_transform

In [23]:
from sklearn.model_selection import train_test_split
from PIL import Image

def run_experiment(config):
    # Initialize wandb
    wandb.init(project="facial-expression-recognition",
               name=config["model"],
               config=config)

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')

    # Load data
    print("Loading data...")
    try:
        data = pd.read_csv('train.csv')
    except FileNotFoundError:
        print("Please download train.csv from Kaggle FER2013 dataset")
        return

    print("Splitting data into train/val...")
    train_data, val_data = train_test_split(
        data, test_size=0.15, stratify=data['emotion'], random_state=42
    )

    # Create transforms
    train_transform, val_transform = create_data_transforms(config)

    # Create datasets
    train_dataset = FER2013Dataset(train_data, train_transform)
    val_dataset = FER2013Dataset(val_data, val_transform)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'],
                              shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'],
                            shuffle=False, num_workers=2)

    # Create model
    if config['model'] == 'resnet18':
        model = ResNet18(dropout_rate=config['dropout'])
    elif config['model'] == 'resnet34':
        model = ResNet34(dropout_rate=config['dropout'])
    elif config['model'] == 'resnet50':
        model = ResNet50(dropout_rate=config['dropout'])
    elif config['model'] == 'resnet101':
        model = ResNet101(dropout_rate=config['dropout'])

    model = model.to(device)

    # Log model info
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    wandb.log({
        'total_parameters': total_params,
        'trainable_parameters': trainable_params
    })

    print(f'Model: {config["model"]}')
    print(f'Total parameters: {total_params:,}')
    print(f'Trainable parameters: {trainable_params:,}')

    # Train model
    best_val_acc = train_model(model, train_loader, val_loader, config, device)

    # Load best model and evaluate
    model.load_state_dict(torch.load(f'best_model_{wandb.run.name}.pth'))
    test_acc, report, cm, preds, targets = evaluate_model(model, val_loader, device)  # test on val for now

    # Log final results
    wandb.log({
        'best_val_accuracy': best_val_acc,
        'test_accuracy': test_acc,
        'precision_macro': report['macro avg']['precision'],
        'recall_macro': report['macro avg']['recall'],
        'f1_macro': report['macro avg']['f1-score']
    })

    print(f'Best Validation Accuracy: {best_val_acc:.2f}%')
    print(f'Test Accuracy (Val): {test_acc:.2f}%')

    # Confusion matrix
    plt.figure(figsize=(10, 8))
    emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=emotion_labels, yticklabels=emotion_labels)
    plt.title(f'Confusion Matrix - {config["model"]}')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')

    # Log confusion matrix to wandb
    wandb.log({"confusion_matrix": wandb.Image(plt)})
    plt.close()

    wandb.finish()


# Different experiment configurations

In [None]:
# Different experiment configurations
experiments = [


    # ResNet18 experiments
    {
        'model': 'resnet18',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 20,
        'optimizer': 'adam',
        'dropout': 0.3,
        'weight_decay': 1e-4
    },

    {
        'model': 'resnet18',
        'batch_size': 32,
        'learning_rate': 0.01,
        'epochs': 20,
        'optimizer': 'sgd',
        'dropout': 0.5,
        'weight_decay': 1e-4
    },

    # ResNet34 experiments
    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 20,
        'optimizer': 'adam',
        'dropout': 0.3,
        'weight_decay': 1e-4
    },

    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.0005,
        'epochs': 20,
        'optimizer': 'adamw',
        'dropout': 0.4,
        'weight_decay': 1e-3
    },

    # ResNet50 experiments
    {
        'model': 'resnet50',
        'batch_size': 32,
        'learning_rate': 0.001,
        'epochs': 20,
        'optimizer': 'adam',
        'dropout': 0.5,
        'weight_decay': 1e-4
    },

    # High dropout (overfitting prevention)
    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.7,
        'weight_decay': 1e-3
    },

    # Low dropout (potential overfitting)
    {
        'model': 'resnet34',
        'batch_size': 64,
        'learning_rate': 0.001,
        'epochs': 50,
        'optimizer': 'adam',
        'dropout': 0.1,
        'weight_decay': 1e-5
    }
]

# Run all experiments
for i, config in enumerate(experiments):
    print(f"\n{'='*50}")
    print(f"Running Experiment {i+1}/{len(experiments)}")
    print(f"Config: {config}")
    print(f"{'='*50}\n")

    run_experiment(config)


Running Experiment 1/7
Config: {'model': 'resnet18', 'batch_size': 64, 'learning_rate': 0.001, 'epochs': 20, 'optimizer': 'adam', 'dropout': 0.3, 'weight_decay': 0.0001}



0,1
total_parameters,▁
trainable_parameters,▁

0,1
total_parameters,11180103
trainable_parameters,11180103


Using device: cuda
Loading data...
Splitting data into train/val...
Model: resnet18
Total parameters: 11,180,103
Trainable parameters: 11,180,103


Epoch 1/20 - Training: 100%|██████████| 382/382 [01:58<00:00,  3.23it/s, Loss=1.6164, Acc=24.62%]


Epoch 1: Train Acc: 24.62%, Val Acc: 25.42%


Epoch 2/20 - Training: 100%|██████████| 382/382 [01:55<00:00,  3.31it/s, Loss=1.1595, Acc=35.11%]


Epoch 2: Train Acc: 35.11%, Val Acc: 35.92%


Epoch 3/20 - Training: 100%|██████████| 382/382 [01:57<00:00,  3.26it/s, Loss=1.3383, Acc=45.77%]


Epoch 3: Train Acc: 45.77%, Val Acc: 41.42%


Epoch 4/20 - Training: 100%|██████████| 382/382 [01:54<00:00,  3.34it/s, Loss=1.1806, Acc=50.20%]


Epoch 4: Train Acc: 50.20%, Val Acc: 47.34%


Epoch 5/20 - Training: 100%|██████████| 382/382 [01:55<00:00,  3.30it/s, Loss=1.0989, Acc=53.44%]


Epoch 5: Train Acc: 53.44%, Val Acc: 50.01%


Epoch 6/20 - Training: 100%|██████████| 382/382 [01:52<00:00,  3.40it/s, Loss=1.4850, Acc=54.87%]


Epoch 6: Train Acc: 54.87%, Val Acc: 54.28%


Epoch 7/20 - Training: 100%|██████████| 382/382 [01:55<00:00,  3.31it/s, Loss=1.0695, Acc=56.73%]


Epoch 7: Train Acc: 56.73%, Val Acc: 56.19%


Epoch 8/20 - Training: 100%|██████████| 382/382 [01:54<00:00,  3.33it/s, Loss=1.3061, Acc=57.73%]


Epoch 8: Train Acc: 57.73%, Val Acc: 52.24%


Epoch 9/20 - Training: 100%|██████████| 382/382 [01:53<00:00,  3.37it/s, Loss=1.2038, Acc=59.15%]


Epoch 9: Train Acc: 59.15%, Val Acc: 57.05%


Epoch 10/20 - Training: 100%|██████████| 382/382 [01:55<00:00,  3.30it/s, Loss=0.8638, Acc=60.08%]


Epoch 10: Train Acc: 60.08%, Val Acc: 58.02%


Epoch 11/20 - Training: 100%|██████████| 382/382 [01:52<00:00,  3.41it/s, Loss=1.0394, Acc=64.41%]


Epoch 11: Train Acc: 64.41%, Val Acc: 63.55%


Epoch 12/20 - Training: 100%|██████████| 382/382 [01:53<00:00,  3.36it/s, Loss=0.9803, Acc=66.06%]


Epoch 12: Train Acc: 66.06%, Val Acc: 63.45%


Epoch 13/20 - Training: 100%|██████████| 382/382 [01:52<00:00,  3.39it/s, Loss=0.6433, Acc=66.83%]


Epoch 13: Train Acc: 66.83%, Val Acc: 64.59%


Epoch 14/20 - Training: 100%|██████████| 382/382 [01:54<00:00,  3.34it/s, Loss=1.5665, Acc=67.89%]


Epoch 14: Train Acc: 67.89%, Val Acc: 64.66%


Epoch 15/20 - Training: 100%|██████████| 382/382 [01:55<00:00,  3.32it/s, Loss=0.7048, Acc=68.74%]


Epoch 15: Train Acc: 68.74%, Val Acc: 64.66%


Epoch 16/20 - Training: 100%|██████████| 382/382 [01:52<00:00,  3.40it/s, Loss=0.7971, Acc=69.43%]


Epoch 16: Train Acc: 69.43%, Val Acc: 64.50%


Epoch 17/20 - Training: 100%|██████████| 382/382 [01:54<00:00,  3.32it/s, Loss=0.6469, Acc=69.81%]


Epoch 17: Train Acc: 69.81%, Val Acc: 64.43%


Epoch 18/20 - Training: 100%|██████████| 382/382 [01:54<00:00,  3.34it/s, Loss=0.6414, Acc=71.21%]


Epoch 18: Train Acc: 71.21%, Val Acc: 65.31%


Epoch 19/20 - Training: 100%|██████████| 382/382 [01:58<00:00,  3.23it/s, Loss=0.7552, Acc=72.13%]


Epoch 19: Train Acc: 72.13%, Val Acc: 65.13%


Epoch 20/20 - Training: 100%|██████████| 382/382 [01:54<00:00,  3.33it/s, Loss=0.4071, Acc=73.43%]


Epoch 20: Train Acc: 73.43%, Val Acc: 64.73%
Best Validation Accuracy: 65.31%
Test Accuracy (Val): 65.31%


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_macro,▁
learning_rate,██████████▁▁▁▁▁▁▁▁▁▁
precision_macro,▁
recall_macro,▁
test_accuracy,▁
total_parameters,▁
train_accuracy,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
train_loss,█▇▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁

0,1
best_val_accuracy,65.31228
epoch,20.0
f1_macro,0.6036
learning_rate,0.0001
precision_macro,0.64015
recall_macro,0.59019
test_accuracy,65.31228
total_parameters,11180103.0
train_accuracy,73.42841
train_loss,0.71998



Running Experiment 2/7
Config: {'model': 'resnet18', 'batch_size': 32, 'learning_rate': 0.01, 'epochs': 20, 'optimizer': 'sgd', 'dropout': 0.5, 'weight_decay': 0.0001}



Using device: cuda
Loading data...
Splitting data into train/val...
Model: resnet18
Total parameters: 11,180,103
Trainable parameters: 11,180,103


Epoch 1/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.41it/s, Loss=1.9734, Acc=23.15%]


Epoch 1: Train Acc: 23.15%, Val Acc: 25.12%


Epoch 2/20 - Training: 100%|██████████| 763/763 [02:02<00:00,  6.23it/s, Loss=1.6080, Acc=27.62%]


Epoch 2: Train Acc: 27.62%, Val Acc: 30.67%


Epoch 3/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.36it/s, Loss=1.5932, Acc=40.97%]


Epoch 3: Train Acc: 40.97%, Val Acc: 45.39%


Epoch 4/20 - Training: 100%|██████████| 763/763 [02:01<00:00,  6.28it/s, Loss=1.6223, Acc=47.76%]


Epoch 4: Train Acc: 47.76%, Val Acc: 51.06%


Epoch 5/20 - Training: 100%|██████████| 763/763 [02:00<00:00,  6.32it/s, Loss=0.9011, Acc=51.86%]


Epoch 5: Train Acc: 51.86%, Val Acc: 53.84%


Epoch 6/20 - Training: 100%|██████████| 763/763 [01:58<00:00,  6.43it/s, Loss=0.7670, Acc=54.98%]


Epoch 6: Train Acc: 54.98%, Val Acc: 56.81%


Epoch 7/20 - Training: 100%|██████████| 763/763 [02:01<00:00,  6.27it/s, Loss=0.9799, Acc=56.93%]


Epoch 7: Train Acc: 56.93%, Val Acc: 57.02%


Epoch 8/20 - Training: 100%|██████████| 763/763 [02:04<00:00,  6.14it/s, Loss=1.2395, Acc=58.96%]


Epoch 8: Train Acc: 58.96%, Val Acc: 59.02%


Epoch 9/20 - Training: 100%|██████████| 763/763 [02:02<00:00,  6.21it/s, Loss=1.0654, Acc=60.10%]


Epoch 9: Train Acc: 60.10%, Val Acc: 59.72%


Epoch 10/20 - Training: 100%|██████████| 763/763 [02:00<00:00,  6.33it/s, Loss=0.8007, Acc=61.61%]


Epoch 10: Train Acc: 61.61%, Val Acc: 60.81%


Epoch 11/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.37it/s, Loss=1.0538, Acc=66.55%]


Epoch 11: Train Acc: 66.55%, Val Acc: 63.62%


Epoch 12/20 - Training: 100%|██████████| 763/763 [02:03<00:00,  6.20it/s, Loss=0.6188, Acc=68.05%]


Epoch 12: Train Acc: 68.05%, Val Acc: 64.22%


Epoch 13/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.40it/s, Loss=0.7357, Acc=69.00%]


Epoch 13: Train Acc: 69.00%, Val Acc: 64.36%


Epoch 14/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.40it/s, Loss=0.7571, Acc=70.01%]


Epoch 14: Train Acc: 70.01%, Val Acc: 64.57%


Epoch 15/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.39it/s, Loss=1.7971, Acc=71.32%]


Epoch 15: Train Acc: 71.32%, Val Acc: 64.38%


Epoch 16/20 - Training: 100%|██████████| 763/763 [01:56<00:00,  6.54it/s, Loss=0.7689, Acc=71.64%]


Epoch 16: Train Acc: 71.64%, Val Acc: 64.71%


Epoch 17/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.38it/s, Loss=0.6375, Acc=72.56%]


Epoch 17: Train Acc: 72.56%, Val Acc: 64.69%


Epoch 18/20 - Training: 100%|██████████| 763/763 [01:59<00:00,  6.40it/s, Loss=0.6350, Acc=73.72%]


Epoch 18: Train Acc: 73.72%, Val Acc: 64.41%


Epoch 19/20 - Training: 100%|██████████| 763/763 [01:58<00:00,  6.45it/s, Loss=0.5904, Acc=74.76%]


Epoch 19: Train Acc: 74.76%, Val Acc: 64.08%


Epoch 20/20 - Training: 100%|██████████| 763/763 [02:00<00:00,  6.35it/s, Loss=1.0553, Acc=75.58%]


Epoch 20: Train Acc: 75.58%, Val Acc: 64.87%
Best Validation Accuracy: 64.87%
Test Accuracy (Val): 64.87%


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_macro,▁
learning_rate,██████████▁▁▁▁▁▁▁▁▁▁
precision_macro,▁
recall_macro,▁
test_accuracy,▁
total_parameters,▁
train_accuracy,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇▇████
train_loss,█▇▆▅▄▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
best_val_accuracy,64.87114
epoch,20.0
f1_macro,0.60561
learning_rate,0.001
precision_macro,0.64348
recall_macro,0.59006
test_accuracy,64.87114
total_parameters,11180103.0
train_accuracy,75.57577
train_loss,0.66528



Running Experiment 3/7
Config: {'model': 'resnet34', 'batch_size': 64, 'learning_rate': 0.001, 'epochs': 20, 'optimizer': 'adam', 'dropout': 0.3, 'weight_decay': 0.0001}



Using device: cuda
Loading data...
Splitting data into train/val...
Model: resnet34
Total parameters: 21,288,263
Trainable parameters: 21,288,263


Epoch 1/20 - Training: 100%|██████████| 382/382 [02:16<00:00,  2.79it/s, Loss=1.7521, Acc=23.76%]


Epoch 1: Train Acc: 23.76%, Val Acc: 26.79%


Epoch 2/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.76it/s, Loss=1.4850, Acc=32.83%]


Epoch 2: Train Acc: 32.83%, Val Acc: 28.51%


Epoch 3/20 - Training: 100%|██████████| 382/382 [02:15<00:00,  2.81it/s, Loss=1.1612, Acc=44.06%]


Epoch 3: Train Acc: 44.06%, Val Acc: 48.15%


Epoch 4/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.75it/s, Loss=1.4368, Acc=48.44%]


Epoch 4: Train Acc: 48.44%, Val Acc: 47.97%


Epoch 5/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.75it/s, Loss=1.5346, Acc=51.40%]


Epoch 5: Train Acc: 51.40%, Val Acc: 49.27%


Epoch 6/20 - Training: 100%|██████████| 382/382 [02:17<00:00,  2.79it/s, Loss=1.0563, Acc=53.32%]


Epoch 6: Train Acc: 53.32%, Val Acc: 51.64%


Epoch 7/20 - Training: 100%|██████████| 382/382 [02:15<00:00,  2.82it/s, Loss=0.8037, Acc=55.58%]


Epoch 7: Train Acc: 55.58%, Val Acc: 54.75%


Epoch 8/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.75it/s, Loss=0.9415, Acc=56.48%]


Epoch 8: Train Acc: 56.48%, Val Acc: 51.47%


Epoch 9/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.76it/s, Loss=0.8593, Acc=57.33%]


Epoch 9: Train Acc: 57.33%, Val Acc: 56.21%


Epoch 10/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.75it/s, Loss=1.3616, Acc=59.02%]


Epoch 10: Train Acc: 59.02%, Val Acc: 54.52%


Epoch 11/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.73it/s, Loss=1.1925, Acc=63.24%]


Epoch 11: Train Acc: 63.24%, Val Acc: 61.97%


Epoch 12/20 - Training: 100%|██████████| 382/382 [02:16<00:00,  2.80it/s, Loss=1.0985, Acc=64.86%]


Epoch 12: Train Acc: 64.86%, Val Acc: 63.11%


Epoch 13/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.75it/s, Loss=0.7250, Acc=65.85%]


Epoch 13: Train Acc: 65.85%, Val Acc: 62.62%


Epoch 14/20 - Training: 100%|██████████| 382/382 [02:17<00:00,  2.77it/s, Loss=0.7554, Acc=65.85%]


Epoch 14: Train Acc: 65.85%, Val Acc: 63.20%


Epoch 15/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.76it/s, Loss=0.6937, Acc=67.15%]


Epoch 15: Train Acc: 67.15%, Val Acc: 63.20%


Epoch 16/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.73it/s, Loss=0.9312, Acc=67.74%]


Epoch 16: Train Acc: 67.74%, Val Acc: 63.76%


Epoch 17/20 - Training: 100%|██████████| 382/382 [02:17<00:00,  2.78it/s, Loss=0.7002, Acc=68.61%]


Epoch 17: Train Acc: 68.61%, Val Acc: 63.22%


Epoch 18/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.73it/s, Loss=1.3602, Acc=69.65%]


Epoch 18: Train Acc: 69.65%, Val Acc: 63.25%


Epoch 19/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.75it/s, Loss=0.8360, Acc=70.71%]


Epoch 19: Train Acc: 70.71%, Val Acc: 62.85%


Epoch 20/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.75it/s, Loss=0.6290, Acc=71.40%]


Epoch 20: Train Acc: 71.40%, Val Acc: 63.90%
Best Validation Accuracy: 63.90%
Test Accuracy (Val): 63.90%


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_macro,▁
learning_rate,██████████▁▁▁▁▁▁▁▁▁▁
precision_macro,▁
recall_macro,▁
test_accuracy,▁
total_parameters,▁
train_accuracy,▁▂▄▅▅▅▆▆▆▆▇▇▇▇▇▇████
train_loss,█▇▅▅▄▄▄▄▃▃▂▂▂▂▂▂▁▁▁▁

0,1
best_val_accuracy,63.89598
epoch,20.0
f1_macro,0.60277
learning_rate,0.0001
precision_macro,0.61582
recall_macro,0.59807
test_accuracy,63.89598
total_parameters,21288263.0
train_accuracy,71.40398
train_loss,0.76868



Running Experiment 4/7
Config: {'model': 'resnet34', 'batch_size': 64, 'learning_rate': 0.0005, 'epochs': 20, 'optimizer': 'adamw', 'dropout': 0.4, 'weight_decay': 0.001}



Using device: cuda
Loading data...
Splitting data into train/val...
Model: resnet34
Total parameters: 21,288,263
Trainable parameters: 21,288,263


Epoch 1/20 - Training:   7%|▋         | 25/382 [00:09<02:07,  2.79it/s, Loss=2.0075, Acc=22.00%]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x78122647d8a0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Epoch 1/20 - Training:   8%|▊         | 31/382 [00:12<02:49,  2.07it/s, Loss=1.7988, Acc=22.28%]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x78122647d8a0>
Traceback (most recent call last):
  File "/usr/local/lib/pytho

Epoch 1: Train Acc: 24.31%, Val Acc: 25.59%


Epoch 2/20 - Training: 100%|██████████| 382/382 [02:21<00:00,  2.70it/s, Loss=1.7376, Acc=36.26%]


Epoch 2: Train Acc: 36.26%, Val Acc: 45.23%


Epoch 3/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.74it/s, Loss=1.3561, Acc=46.73%]


Epoch 3: Train Acc: 46.73%, Val Acc: 50.80%


Epoch 4/20 - Training: 100%|██████████| 382/382 [02:21<00:00,  2.70it/s, Loss=1.2667, Acc=50.44%]


Epoch 4: Train Acc: 50.44%, Val Acc: 51.73%


Epoch 5/20 - Training: 100%|██████████| 382/382 [02:16<00:00,  2.80it/s, Loss=1.1528, Acc=53.59%]


Epoch 5: Train Acc: 53.59%, Val Acc: 53.26%


Epoch 6/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.76it/s, Loss=1.1995, Acc=56.22%]


Epoch 6: Train Acc: 56.22%, Val Acc: 56.03%


Epoch 7/20 - Training: 100%|██████████| 382/382 [02:21<00:00,  2.70it/s, Loss=0.9215, Acc=57.91%]


Epoch 7: Train Acc: 57.91%, Val Acc: 58.23%


Epoch 8/20 - Training: 100%|██████████| 382/382 [02:20<00:00,  2.73it/s, Loss=1.3025, Acc=59.28%]


Epoch 8: Train Acc: 59.28%, Val Acc: 58.90%


Epoch 9/20 - Training: 100%|██████████| 382/382 [02:21<00:00,  2.71it/s, Loss=0.8539, Acc=61.18%]


Epoch 9: Train Acc: 61.18%, Val Acc: 58.42%


Epoch 10/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.73it/s, Loss=1.0418, Acc=62.26%]


Epoch 10: Train Acc: 62.26%, Val Acc: 61.34%


Epoch 11/20 - Training: 100%|██████████| 382/382 [02:20<00:00,  2.72it/s, Loss=1.1823, Acc=65.89%]


Epoch 11: Train Acc: 65.89%, Val Acc: 63.99%


Epoch 12/20 - Training: 100%|██████████| 382/382 [02:19<00:00,  2.74it/s, Loss=1.2494, Acc=67.39%]


Epoch 12: Train Acc: 67.39%, Val Acc: 64.64%


Epoch 13/20 - Training: 100%|██████████| 382/382 [02:18<00:00,  2.76it/s, Loss=0.6958, Acc=67.80%]


Epoch 13: Train Acc: 67.80%, Val Acc: 64.66%


Epoch 14/20 - Training: 100%|██████████| 382/382 [02:21<00:00,  2.71it/s, Loss=0.8915, Acc=68.82%]


Epoch 14: Train Acc: 68.82%, Val Acc: 64.41%


Epoch 15/20 - Training: 100%|██████████| 382/382 [02:20<00:00,  2.72it/s, Loss=0.5602, Acc=69.11%]


Epoch 15: Train Acc: 69.11%, Val Acc: 64.43%


Epoch 16/20 - Training: 100%|██████████| 382/382 [02:20<00:00,  2.71it/s, Loss=0.6356, Acc=69.78%]


Epoch 16: Train Acc: 69.78%, Val Acc: 65.03%


Epoch 17/20 - Training: 100%|██████████| 382/382 [02:22<00:00,  2.68it/s, Loss=0.7686, Acc=70.41%]


Epoch 17: Train Acc: 70.41%, Val Acc: 65.01%


Epoch 18/20 - Training: 100%|██████████| 382/382 [02:22<00:00,  2.68it/s, Loss=0.6999, Acc=70.89%]


Epoch 18: Train Acc: 70.89%, Val Acc: 65.06%


Epoch 19/20 - Training: 100%|██████████| 382/382 [02:23<00:00,  2.66it/s, Loss=0.5932, Acc=71.08%]


Epoch 19: Train Acc: 71.08%, Val Acc: 64.82%


Epoch 20/20 - Training: 100%|██████████| 382/382 [02:23<00:00,  2.66it/s, Loss=0.7554, Acc=72.42%]


Epoch 20: Train Acc: 72.42%, Val Acc: 64.96%
Best Validation Accuracy: 65.06%
Test Accuracy (Val): 65.06%


0,1
best_val_accuracy,▁
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
f1_macro,▁
learning_rate,██████████▁▁▁▁▁▁▁▁▁▁
precision_macro,▁
recall_macro,▁
test_accuracy,▁
total_parameters,▁
train_accuracy,▁▃▄▅▅▆▆▆▆▇▇▇▇▇██████
train_loss,█▇▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁

0,1
best_val_accuracy,65.05688
epoch,20.0
f1_macro,0.59501
learning_rate,5e-05
precision_macro,0.61366
recall_macro,0.58643
test_accuracy,65.05688
total_parameters,21288263.0
train_accuracy,72.42439
train_loss,0.75494



Running Experiment 5/7
Config: {'model': 'resnet50', 'batch_size': 32, 'learning_rate': 0.001, 'epochs': 20, 'optimizer': 'adam', 'dropout': 0.5, 'weight_decay': 0.0001}



Using device: cuda
Loading data...
Splitting data into train/val...
Model: resnet50
Total parameters: 23,522,375
Trainable parameters: 23,522,375


Epoch 1/20 - Training: 100%|██████████| 763/763 [04:09<00:00,  3.05it/s, Loss=1.8361, Acc=23.98%]


Epoch 1: Train Acc: 23.98%, Val Acc: 25.26%


Epoch 2/20 - Training: 100%|██████████| 763/763 [04:10<00:00,  3.05it/s, Loss=1.7696, Acc=24.82%]


Epoch 2: Train Acc: 24.82%, Val Acc: 25.33%


Epoch 3/20 - Training: 100%|██████████| 763/763 [04:10<00:00,  3.04it/s, Loss=1.7195, Acc=30.24%]


Epoch 3: Train Acc: 30.24%, Val Acc: 33.41%


Epoch 4/20 - Training: 100%|██████████| 763/763 [04:10<00:00,  3.05it/s, Loss=1.7057, Acc=39.05%]


Epoch 4: Train Acc: 39.05%, Val Acc: 38.19%


Epoch 5/20 - Training: 100%|██████████| 763/763 [04:07<00:00,  3.08it/s, Loss=1.6007, Acc=43.57%]


Epoch 5: Train Acc: 43.57%, Val Acc: 45.16%


Epoch 6/20 - Training: 100%|██████████| 763/763 [04:07<00:00,  3.08it/s, Loss=1.4123, Acc=46.14%]


Epoch 6: Train Acc: 46.14%, Val Acc: 46.95%


Epoch 7/20 - Training: 100%|██████████| 763/763 [04:07<00:00,  3.09it/s, Loss=1.7175, Acc=47.69%]


Epoch 7: Train Acc: 47.69%, Val Acc: 48.46%


Epoch 8/20 - Training: 100%|██████████| 763/763 [04:05<00:00,  3.10it/s, Loss=1.1032, Acc=49.41%]


Epoch 8: Train Acc: 49.41%, Val Acc: 51.64%


Epoch 9/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.13it/s, Loss=1.3975, Acc=50.92%]


Epoch 9: Train Acc: 50.92%, Val Acc: 51.92%


Epoch 10/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.13it/s, Loss=1.4032, Acc=51.86%]


Epoch 10: Train Acc: 51.86%, Val Acc: 51.59%


Epoch 11/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.13it/s, Loss=1.2707, Acc=56.15%]


Epoch 11: Train Acc: 56.15%, Val Acc: 57.35%


Epoch 12/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.13it/s, Loss=1.0781, Acc=57.76%]


Epoch 12: Train Acc: 57.76%, Val Acc: 58.00%


Epoch 13/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.14it/s, Loss=1.3213, Acc=57.76%]


Epoch 13: Train Acc: 57.76%, Val Acc: 58.14%


Epoch 14/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.13it/s, Loss=1.3223, Acc=58.54%]


Epoch 14: Train Acc: 58.54%, Val Acc: 58.49%


Epoch 15/20 - Training: 100%|██████████| 763/763 [04:03<00:00,  3.14it/s, Loss=1.1605, Acc=59.02%]


Epoch 15: Train Acc: 59.02%, Val Acc: 58.86%


Epoch 16/20 - Training: 100%|██████████| 763/763 [04:02<00:00,  3.15it/s, Loss=1.0124, Acc=59.50%]


Epoch 16: Train Acc: 59.50%, Val Acc: 59.23%


Epoch 17/20 - Training:  61%|██████▏   | 469/763 [02:29<01:35,  3.09it/s, Loss=1.2823, Acc=59.69%]