<a href="https://colab.research.google.com/github/gchit21/ML4/blob/main/FiveLayerBatchLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Kaggle and wandb Setup**

In [1]:
!pip install kaggle



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [4]:
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge


Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 98% 280M/285M [00:01<00:00, 272MB/s]
100% 285M/285M [00:01<00:00, 260MB/s]
Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [5]:
!pip install wandb onnx -Uq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgchit21[0m ([33mgchit21-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# **Model**

In [7]:
import os
import random

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

from sklearn.model_selection import train_test_split

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

**Turn the pixel string into an numpy Array and then into an Image**

In [8]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np

from PIL import Image

import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

class FERDataset(Dataset):
    def __init__(self, csv_file, usage, transform=None):
        self.data = pd.read_csv(csv_file)
        self.data = self.data[self.data[' Usage'] == usage]
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = np.fromstring(self.data.iloc[idx][' pixels'], sep=' ', dtype=np.uint8).reshape(48, 48)
        image = Image.fromarray(pixels)
        label = int(self.data.iloc[idx]['emotion'])

        if self.transform:
            image = self.transform(image)

        return image, label


**Model**

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FiveLayerBatchLSTM(nn.Module):
    def __init__(self, kernels, kernel_size=3, classes=7, drop_threshold=0.3, lstm_hidden=64):
        super(FiveLayerBatchLSTM, self).__init__()

        assert len(kernels) == 5, "kernels list must have exactly 5 values (one for each conv layer)."

        self.conv1 = nn.Conv2d(1, kernels[0], kernel_size=kernel_size, padding=1)
        self.bn1 = nn.BatchNorm2d(kernels[0])

        self.conv2 = nn.Conv2d(kernels[0], kernels[1], kernel_size=kernel_size, padding=1)
        self.bn2 = nn.BatchNorm2d(kernels[1])

        self.conv3 = nn.Conv2d(kernels[1], kernels[2], kernel_size=kernel_size, padding=1)
        self.bn3 = nn.BatchNorm2d(kernels[2])

        self.conv4 = nn.Conv2d(kernels[2], kernels[3], kernel_size=kernel_size, padding=1)
        self.bn4 = nn.BatchNorm2d(kernels[3])

        self.conv5 = nn.Conv2d(kernels[3], kernels[4], kernel_size=kernel_size, padding=1)
        self.bn5 = nn.BatchNorm2d(kernels[4])

        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(drop_threshold)

        # LSTM expects input shape: (seq_len, batch, input_size)
        # We will reshape CNN output (B, C, 3, 3) -> (B, 3, C*3) -> LSTM
        self.lstm_input_size = kernels[4] * 3
        self.lstm = nn.LSTM(input_size=self.lstm_input_size, hidden_size=lstm_hidden, batch_first=True)

        self.fc = nn.Linear(lstm_hidden, classes)

    def forward(self, x):
        x = self.pool(F.leaky_relu(self.bn1(self.conv1(x))))  # 48 → 24
        x = self.pool(F.leaky_relu(self.bn2(self.conv2(x))))  # 24 → 12
        x = self.pool(F.leaky_relu(self.bn3(self.conv3(x))))  # 12 → 6
        x = self.pool(F.leaky_relu(self.bn4(self.conv4(x))))  # 6  → 3
        x = F.leaky_relu(self.bn5(self.conv5(x)))             # 3  → 3 (no pool)

        x = self.dropout(x)                                   # Shape: (B, C, 3, 3)
        x = x.permute(0, 2, 1, 3).contiguous()                # (B, 3, C, 3)
        x = x.view(x.size(0), 3, -1)                          # (B, 3, C*3)

        _, (hn, _) = self.lstm(x)                             # hn: (1, B, hidden)
        x = hn[-1]                                            # (B, hidden)
        x = self.fc(x)                                        # (B, classes)
        return x


**Training**

In [10]:
def train_model(model, train_loader, criterion, optimizer, device, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        example_ct=0
        batch_ct=0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            loss, outputs = train_batch(images,labels,model,optimizer,criterion)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            example_ct +=  len(images)
            batch_ct += 1

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")
        wandb.log({
          "epoch": num_epochs,
          "train/loss": round(running_loss / len(train_loader),4),
          "train/accuracy": round(100 * correct / total,2)
        })

def train_batch(images, labels, model, optimizer, criterion):
    images, labels = images.to(device), labels.to(device)

    # Forward pass ➡
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss,outputs


**Train Logging**

In [11]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

**Testing and Test Logging**

In [12]:
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    wandb.log({"Test Accuracy": round(100 * correct / total, 2)})

    print(f"Test Accuracy: {100 * correct / total:.2f}%")


In [17]:
def make(config):

    transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),])

    # Make the data
    train =  FERDataset("icml_face_data.csv", usage="Training", transform=transform)
    val =  FERDataset("icml_face_data.csv", usage="PublicTest", transform=transforms.ToTensor())

    train_loader = DataLoader(train, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val, batch_size=config.batch_size, shuffle=False)

    # Make the model
    model = FiveLayerBatchLSTM(config.kernels,config.kernel_size,config.classes,config.drop_threshold,config.lstm_hidden).to(device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing)
    optimizer = torch.optim.Adam(
        model.parameters(), lr=config.learning_rate)

    return model, train_loader, val_loader, criterion, optimizer

In [14]:
def model_pipeline(hyperparameters, run_name):

    # tell wandb to get started
    with wandb.init(project="5-Layer_CNN_LSTM", name=run_name, config=hyperparameters):
      # access all HPs through wandb.config, so logging matches execution!
      config = wandb.config

      # make the model, data, and optimization problem
      model, train_loader, val_loader, criterion, optimizer = make(config)
      print(model)

      # and use them to train the model
      train_model(model, train_loader, criterion, optimizer, device, config.epochs)

      # and test its final performance
      test_model(model, val_loader, device)

    return model

In [15]:
number = 10

In [20]:
config = dict(
    epochs=10,
    classes=7,
    kernels=[12, 24, 36 ,64, 80],
    kernel_size = 3,
    batch_size=125,
    learning_rate=0.0002,
    drop_threshold=0.35,
    lstm_hidden= 35,
    label_smoothing = 0.2,
    dataset="icml_face_data",
    architecture="CNN_BatchNorm")


name ="run_" + str(number)
number+=1

model = model_pipeline(config,name)

FiveLayerBatchLSTM(
  (conv1): Conv2d(1, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(24, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(36, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn5): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.35, inplace=False)
  (lstm): LSTM(240, 35



Loss after 18625 examples: 1.749
Loss after 21750 examples: 1.731
Loss after 24875 examples: 1.709
Loss after 28000 examples: 1.742
Epoch 2/10, Loss: 1.6793, Accuracy: 41.33%
Loss after 03000 examples: 1.655




Loss after 06125 examples: 1.642
Loss after 09250 examples: 1.602
Loss after 12375 examples: 1.533
Loss after 15500 examples: 1.682
Loss after 18625 examples: 1.598
Loss after 21750 examples: 1.615
Loss after 24875 examples: 1.637




Loss after 28000 examples: 1.563
Epoch 3/10, Loss: 1.6282, Accuracy: 44.76%
Loss after 03000 examples: 1.637
Loss after 06125 examples: 1.577
Loss after 09250 examples: 1.503
Loss after 12375 examples: 1.603
Loss after 15500 examples: 1.629




Loss after 18625 examples: 1.601
Loss after 21750 examples: 1.512
Loss after 24875 examples: 1.574
Loss after 28000 examples: 1.542
Epoch 4/10, Loss: 1.5907, Accuracy: 47.33%
Loss after 03000 examples: 1.606
Loss after 06125 examples: 1.543




Loss after 09250 examples: 1.511
Loss after 12375 examples: 1.538
Loss after 15500 examples: 1.552
Loss after 18625 examples: 1.571
Loss after 21750 examples: 1.559
Loss after 24875 examples: 1.587




Loss after 28000 examples: 1.499
Epoch 5/10, Loss: 1.5667, Accuracy: 48.99%
Loss after 03000 examples: 1.555
Loss after 06125 examples: 1.609
Loss after 09250 examples: 1.617
Loss after 12375 examples: 1.597
Loss after 15500 examples: 1.566
Loss after 18625 examples: 1.539




Loss after 21750 examples: 1.546
Loss after 24875 examples: 1.630
Loss after 28000 examples: 1.510
Epoch 6/10, Loss: 1.5418, Accuracy: 51.05%
Loss after 03000 examples: 1.471
Loss after 06125 examples: 1.604
Loss after 09250 examples: 1.513




Loss after 12375 examples: 1.505
Loss after 15500 examples: 1.447
Loss after 18625 examples: 1.497
Loss after 21750 examples: 1.502
Loss after 24875 examples: 1.465




Loss after 28000 examples: 1.484
Epoch 7/10, Loss: 1.5239, Accuracy: 52.14%
Loss after 03000 examples: 1.508
Loss after 06125 examples: 1.539
Loss after 09250 examples: 1.541
Loss after 12375 examples: 1.453
Loss after 15500 examples: 1.477




Loss after 18625 examples: 1.437
Loss after 21750 examples: 1.584
Loss after 24875 examples: 1.541
Loss after 28000 examples: 1.567
Epoch 8/10, Loss: 1.5059, Accuracy: 53.26%
Loss after 03000 examples: 1.457
Loss after 06125 examples: 1.415
Loss after 09250 examples: 1.517




Loss after 12375 examples: 1.401
Loss after 15500 examples: 1.514
Loss after 18625 examples: 1.454
Loss after 21750 examples: 1.396
Loss after 24875 examples: 1.490
Loss after 28000 examples: 1.556




Epoch 9/10, Loss: 1.4923, Accuracy: 54.33%
Loss after 03000 examples: 1.484
Loss after 06125 examples: 1.448
Loss after 09250 examples: 1.388
Loss after 12375 examples: 1.551
Loss after 15500 examples: 1.509
Loss after 18625 examples: 1.417




Loss after 21750 examples: 1.547
Loss after 24875 examples: 1.527
Loss after 28000 examples: 1.494
Epoch 10/10, Loss: 1.4801, Accuracy: 55.12%
Test Accuracy: 51.99%


0,1
Test Accuracy,▁
epoch,▁▁▁▁▁▁▁▁██████████
loss,█▆▅▆▄▃▄▁▁
train/accuracy,▁▄▅▆▆▇▇▇██
train/loss,█▅▄▃▃▂▂▂▁▁

0,1
Test Accuracy,51.99
epoch,10.0
loss,1.73361
train/accuracy,55.12
train/loss,1.4801
