# Attention-based CNN for Facial Expression Recognition

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required packages
!pip install wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn

# Set up Kaggle API
!pip install kaggle

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
!unzip -q challenges-in-representation-learning-facial-expression-recognition-challenge.zip

Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 86% 246M/285M [00:00<00:00, 445MB/s]
100% 285M/285M [00:00<00:00, 475MB/s]


In [5]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from sklearn.metrics import classification_report, confusion_matrix
import wandb
from datetime import datetime
from tqdm import tqdm


In [6]:

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Configuration
CONFIG = {
    'model_name': 'attention_cnn',
    'batch_size': 64,
    'learning_rate': 0.0005,
    'epochs': 20,
    'image_size': 48,
    'num_classes': 7,
    'random_seed': 42,
    'weight_decay': 1e-4,
    'dropout_rate': 0.5,
    'attention_heads': 4  # Number of attention heads
}

# Set random seeds for reproducibility
torch.manual_seed(CONFIG['random_seed'])
np.random.seed(CONFIG['random_seed'])

torch.backends.cudnn.benchmark = True

# Use mixed precision training
scaler = torch.cuda.amp.GradScaler()


Using device: cuda


  scaler = torch.cuda.amp.GradScaler()


In [7]:

# Initialize Weights & Biases for experiment tracking
wandb.init(
    project="facial-expression-recognition",
    name=f"{CONFIG['model_name']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
    config=CONFIG,
    job_type="training"
)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mellekvirikashvili[0m ([33mellekvirikashvili-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Dataset and Data Loading

In [8]:

class FERDataset(Dataset):
    def __init__(self, dataframe, indices, transform=None):
        self.data = dataframe.iloc[indices].reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = self.data.iloc[idx]['pixels']
        image = np.array(pixels.split(), dtype=np.uint8).reshape(48, 48)
        image = image.astype(np.float32) / 255.0
        image = torch.from_numpy(image).unsqueeze(0)  # Add channel dimension

        if self.transform:
            image = self.transform(image)

        label = int(self.data.iloc[idx]['emotion'])
        return image, label


## Attention Mechanisms

In [9]:

class SpatialAttention(nn.Module):
    def __init__(self, in_channels, reduction_ratio=8):
        super(SpatialAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // reduction_ratio, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels // reduction_ratio, in_channels, 1, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return x * out

class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction_ratio=8):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction_ratio, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction_ratio, in_channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        avg_out = self.fc(self.avg_pool(x).view(b, c))
        max_out = self.fc(self.max_pool(x).view(b, c))
        out = (avg_out + max_out).view(b, c, 1, 1)
        return x * out

class CBAM(nn.Module):
    """Convolutional Block Attention Module"""
    def __init__(self, in_channels, reduction_ratio=8):
        super(CBAM, self).__init__()
        self.channel_att = ChannelAttention(in_channels, reduction_ratio)
        self.spatial_att = SpatialAttention(in_channels, reduction_ratio)

    def forward(self, x):
        x = self.channel_att(x)
        x = self.spatial_att(x)
        return x


In [10]:

class AttentionCNN(nn.Module):
    def __init__(self, num_classes=7, dropout_rate=0.5):
        super(AttentionCNN, self).__init__()

        # Initial conv block
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Attention after first block
        self.att1 = CBAM(32)

        # Second conv block
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Attention after second block
        self.att2 = CBAM(64)

        # Third conv block
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Attention after third block
        self.att3 = CBAM(128)

        # Global average pooling
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        # First block
        x = self.conv1(x)
        x = self.att1(x)

        # Second block
        x = self.conv2(x)
        x = self.att2(x)

        # Third block
        x = self.conv3(x)
        x = self.att3(x)

        # Global average pooling and flatten
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = self.fc(x)

        return x


In [11]:
class LightAttentionCNN(nn.Module):
    def __init__(self, num_classes=7, dropout_rate=0.3):
        super().__init__()
        # Initial conv block
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Attention after first block
        self.att1 = CBAM(16)

        # Second conv block
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Attention after second block
        self.att2 = CBAM(32)

        # Global average pooling
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(32, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        # First convolutional block + attention
        x = self.conv1(x)
        x = self.att1(x)

        # Second convolutional block + attention
        x = self.conv2(x)
        x = self.att2(x)

        # Global average pooling
        x = self.global_avg_pool(x)
        x = torch.flatten(x, 1)  # Flatten to (batch_size, 32)

        # Fully connected layers
        x = self.fc(x)

        return x


In [12]:

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=50):
    best_val_acc = 0.1

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        train_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')

        for inputs, labels in train_bar:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Update progress bar
            train_bar.set_postfix({
                'loss': running_loss / total,
                'acc': 100. * correct / total
            })

            # In your training loop:
            with torch.cuda.amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        # Log training metrics
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = 100. * correct / len(train_loader.dataset)

        wandb.log({
            'train/loss': epoch_loss,
            'train/accuracy': epoch_acc,
            'epoch': epoch
        })

        # Validation phase
        val_loss, val_acc = evaluate_model(model, val_loader, criterion)

        # Log validation metrics
        wandb.log({
            'val/loss': val_loss,
            'val/accuracy': val_acc,
            'epoch': epoch
        })

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_attention_cnn.pth')
            print(f'Model saved with validation accuracy: {val_acc:.2f}%')

    return model

In [13]:

def evaluate_model(model, data_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc='Evaluating'):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_loss = running_loss / len(data_loader.dataset)
    accuracy = 100. * correct / len(data_loader.dataset)

    print(f'Validation Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

    # Log confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    wandb.log({"confusion_matrix": wandb.Image(plt)})
    plt.close()

    # Log classification report
    class_report = classification_report(all_labels, all_preds, output_dict=True)
    wandb.log({"classification_report": class_report})

    return avg_loss, accuracy


In [14]:
# Load data
print("Loading data...")
train_df = pd.read_csv('train.csv')

# Load train/val indices (assuming they're saved from previous experiments)
try:
    train_indices = np.load('../train_indices.npy')
    val_indices = np.load('../val_indices.npy')
except FileNotFoundError:
    # If indices files don't exist, create them
    from sklearn.model_selection import train_test_split
    train_indices, val_indices = train_test_split(
        np.arange(len(train_df)),
        test_size=0.2,
        random_state=CONFIG['random_seed'],
        stratify=train_df['emotion']
    )
    np.save('../train_indices.npy', train_indices)
    np.save('../val_indices.npy', val_indices)

# Create datasets
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

val_transform = transforms.Compose([
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_dataset = FERDataset(train_df, train_indices, transform=train_transform)
val_dataset = FERDataset(train_df, val_indices, transform=val_transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'],
                        shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'],
                        shuffle=False, num_workers=4, pin_memory=True)


Loading data...




In [15]:

# Initialize model, loss function, and optimizer
model = LightAttentionCNN(num_classes=CONFIG['num_classes'],
                    dropout_rate=CONFIG['dropout_rate']).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),
                        lr=CONFIG['learning_rate'],
                        weight_decay=CONFIG['weight_decay'])

# Log model architecture to wandb
wandb.watch(model, log='all')


In [16]:

# Train the model
print("Starting training...")
model = train_model(model, train_loader, val_loader, criterion, optimizer,
                    num_epochs=CONFIG['epochs'])

# Final evaluation
print("Final evaluation...")
evaluate_model(model, val_loader, criterion)

# Save the final model
torch.save(model.state_dict(), 'final_attention_cnn.pth')
wandb.save('*.pth')

print("Training completed!")

Starting training...


  with torch.cuda.amp.autocast():
Epoch 1/20 [Train]: 100%|██████████| 359/359 [00:36<00:00,  9.80it/s, loss=1.8, acc=24.8]
Evaluating: 100%|██████████| 90/90 [00:05<00:00, 16.87it/s]


Validation Loss: 1.7779, Accuracy: 25.46%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 25.46%


  with torch.cuda.amp.autocast():
Epoch 2/20 [Train]: 100%|██████████| 359/359 [00:32<00:00, 11.14it/s, loss=1.78, acc=26]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 24.81it/s]


Validation Loss: 1.7721, Accuracy: 25.62%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 25.62%


  with torch.cuda.amp.autocast():
Epoch 3/20 [Train]: 100%|██████████| 359/359 [00:32<00:00, 11.12it/s, loss=1.78, acc=26.2]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.27it/s]


Validation Loss: 1.7637, Accuracy: 26.30%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 26.30%


  with torch.cuda.amp.autocast():
Epoch 4/20 [Train]: 100%|██████████| 359/359 [00:32<00:00, 11.10it/s, loss=1.77, acc=26.9]
Evaluating: 100%|██████████| 90/90 [00:05<00:00, 16.58it/s]


Validation Loss: 1.7571, Accuracy: 27.05%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 27.05%


  with torch.cuda.amp.autocast():
Epoch 5/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.30it/s, loss=1.77, acc=26.9]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.43it/s]


Validation Loss: 1.7537, Accuracy: 27.15%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 27.15%


  with torch.cuda.amp.autocast():
Epoch 6/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.31it/s, loss=1.76, acc=27.6]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.51it/s]


Validation Loss: 1.7507, Accuracy: 27.39%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 27.39%


  with torch.cuda.amp.autocast():
Epoch 7/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.35it/s, loss=1.74, acc=28.9]
Evaluating: 100%|██████████| 90/90 [00:04<00:00, 18.37it/s]


Validation Loss: 1.7089, Accuracy: 30.18%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 30.18%


  with torch.cuda.amp.autocast():
Epoch 8/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.57it/s, loss=1.71, acc=30.2]
Evaluating: 100%|██████████| 90/90 [00:04<00:00, 19.77it/s]


Validation Loss: 1.6765, Accuracy: 31.70%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 31.70%


  with torch.cuda.amp.autocast():
Epoch 9/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.34it/s, loss=1.69, acc=31.4]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.48it/s]


Validation Loss: 1.6525, Accuracy: 34.69%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 34.69%


  with torch.cuda.amp.autocast():
Epoch 10/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.43it/s, loss=1.67, acc=32.1]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.42it/s]


Validation Loss: 1.6906, Accuracy: 31.10%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  with torch.cuda.amp.autocast():
Epoch 11/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.37it/s, loss=1.65, acc=32.8]
Evaluating: 100%|██████████| 90/90 [00:05<00:00, 15.98it/s]


Validation Loss: 1.6073, Accuracy: 35.13%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 35.13%


  with torch.cuda.amp.autocast():
Epoch 12/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.46it/s, loss=1.64, acc=33.4]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 24.58it/s]


Validation Loss: 1.6196, Accuracy: 34.20%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  with torch.cuda.amp.autocast():
Epoch 13/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.53it/s, loss=1.63, acc=33.7]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.62it/s]


Validation Loss: 1.6067, Accuracy: 35.96%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 35.96%


  with torch.cuda.amp.autocast():
Epoch 14/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.48it/s, loss=1.62, acc=34.2]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 24.06it/s]


Validation Loss: 1.6023, Accuracy: 35.20%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  with torch.cuda.amp.autocast():
Epoch 15/20 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.58it/s, loss=1.61, acc=34.9]
Evaluating: 100%|██████████| 90/90 [00:05<00:00, 17.22it/s]


Validation Loss: 1.5826, Accuracy: 36.49%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 36.49%


  with torch.cuda.amp.autocast():
Epoch 16/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.55it/s, loss=1.61, acc=34.9]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 24.95it/s]


Validation Loss: 1.6078, Accuracy: 35.02%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  with torch.cuda.amp.autocast():
Epoch 17/20 [Train]: 100%|██████████| 359/359 [00:30<00:00, 11.63it/s, loss=1.6, acc=34.9]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.75it/s]


Validation Loss: 1.5819, Accuracy: 36.61%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved with validation accuracy: 36.61%


  with torch.cuda.amp.autocast():
Epoch 18/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.45it/s, loss=1.59, acc=34.8]
Evaluating: 100%|██████████| 90/90 [00:04<00:00, 20.09it/s]


Validation Loss: 1.5927, Accuracy: 35.79%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  with torch.cuda.amp.autocast():
Epoch 19/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.47it/s, loss=1.59, acc=35.6]
Evaluating: 100%|██████████| 90/90 [00:04<00:00, 19.78it/s]


Validation Loss: 1.5740, Accuracy: 36.47%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  with torch.cuda.amp.autocast():
Epoch 20/20 [Train]: 100%|██████████| 359/359 [00:31<00:00, 11.54it/s, loss=1.58, acc=35.5]
Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.33it/s]


Validation Loss: 1.5844, Accuracy: 36.35%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Final evaluation...


Evaluating: 100%|██████████| 90/90 [00:03<00:00, 25.62it/s]


Validation Loss: 1.5844, Accuracy: 36.35%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training completed!


In [18]:
wandb.save