<a href="https://colab.research.google.com/github/gchit21/ML4/blob/main/ThreeLayerCNNBatchResnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Kaggle and wandb Setup**

In [None]:
!pip install kaggle



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [None]:
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge


Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 88% 250M/285M [00:00<00:00, 749MB/s] 
100% 285M/285M [00:00<00:00, 775MB/s]
Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


In [None]:
!pip install wandb onnx -Uq

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/17.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.8/17.6 MB[0m [31m23.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/17.6 MB[0m [31m94.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━[0m [32m12.8/17.6 MB[0m [31m169.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m17.6/17.6 MB[0m [31m179.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m17.6/17.6 MB[0m [31m179.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgchit21[0m ([33mgchit21-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# **Model**

In [None]:
import os
import random

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.auto import tqdm

from sklearn.model_selection import train_test_split

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

**Turn the pixel string into an numpy Array and then into an Image**

In [None]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import numpy as np

from PIL import Image

import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

class FERDataset(Dataset):
    def __init__(self, csv_file, usage, transform=None):
        self.data = pd.read_csv(csv_file)
        self.data = self.data[self.data[' Usage'] == usage]
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = np.fromstring(self.data.iloc[idx][' pixels'], sep=' ', dtype=np.uint8).reshape(48, 48)
        image = Image.fromarray(pixels)
        label = int(self.data.iloc[idx]['emotion'])

        if self.transform:
            image = self.transform(image)

        return image, label


**Model**

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, channels, kernel_size=3):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(channels, channels, kernel_size, padding=1)
        self.bn1 = nn.BatchNorm2d(channels)
        self.relu = nn.LeakyReLU()
        self.conv2 = nn.Conv2d(channels, channels, kernel_size, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity  # Residual connection
        out = self.relu(out)
        return out


In [None]:
import torch.nn.functional as F

class ThreeLayerBatchResnet(nn.Module):
    def __init__(self, kernels, kernel_size, classes=7, drop_threshold=0.2):
        super(ThreeLayerBatchResnet, self).__init__()

        assert len(kernels) == 3, "kernels list must have exactly 3 values (one for each conv layer)."

        self.conv1 = nn.Conv2d(1, kernels[0], kernel_size=kernel_size, padding=1)
        self.bn1 = nn.BatchNorm2d(kernels[0])

        self.conv2 = nn.Conv2d(kernels[0], kernels[1], kernel_size=kernel_size, padding=1)
        self.bn2 = nn.BatchNorm2d(kernels[1])

        self.res_block = ResidualBlock(kernels[1], kernel_size=kernel_size)  # New residual block

        self.conv3 = nn.Conv2d(kernels[1], kernels[2], kernel_size=kernel_size, padding=1)
        self.bn3 = nn.BatchNorm2d(kernels[2])

        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(drop_threshold)
        self.flatten = nn.Flatten()

        self.fc1 = nn.Linear(kernels[2] * 6 * 6, 128)
        self.fc2 = nn.Linear(128, classes)

    def forward(self, x):
        x = self.pool(F.leaky_relu(self.bn1(self.conv1(x))))  # 48 -> 24
        x = self.pool(F.leaky_relu(self.bn2(self.conv2(x))))  # 24 -> 12

        x = self.res_block(x)  # 👈 ResNet layer added here

        x = self.pool(F.leaky_relu(self.bn3(self.conv3(x))))  # 12 -> 6

        x = self.dropout(x)
        x = self.flatten(x)
        x = F.leaky_relu(self.fc1(x))
        x = self.fc2(x)
        return x


**Training**

In [None]:
def train_model(model, train_loader, criterion, optimizer, device, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        example_ct=0
        batch_ct=0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            loss, outputs = train_batch(images,labels,model,optimizer,criterion)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            example_ct +=  len(images)
            batch_ct += 1

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")
        wandb.log({
          "epoch": num_epochs,
          "train/loss": round(running_loss / len(train_loader),4),
          "train/accuracy": round(100 * correct / total,2)
        })

def train_batch(images, labels, model, optimizer, criterion):
    images, labels = images.to(device), labels.to(device)

    # Forward pass ➡
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss,outputs


**Train Logging**

In [None]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

**Testing and Test Logging**

In [None]:
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    wandb.log({"Test Accuracy": round(100 * correct / total, 2)})

    print(f"Test Accuracy: {100 * correct / total:.2f}%")


In [37]:
def make(config):

    import torchvision.transforms as transforms

    transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(5),
    transforms.RandomCrop(48, padding=4),
    transforms.ToTensor(),
    ])


    # Make the data
    train =  FERDataset("icml_face_data.csv", usage="Training", transform=transform)
    val =  FERDataset("icml_face_data.csv", usage="PublicTest", transform=transforms.ToTensor())

    train_loader = DataLoader(train, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val, batch_size=config.batch_size, shuffle=False)

    # Make the model
    model = ThreeLayerBatchResnet(config.kernels,config.kernel_size,config.classes,config.drop_threshold).to(device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        model.parameters(), lr=config.learning_rate)

    return model, train_loader, val_loader, criterion, optimizer

In [None]:
def model_pipeline(hyperparameters, run_name):

    # tell wandb to get started
    with wandb.init(project="3-Layer_BatchNorm_Resnet", name=run_name, config=hyperparameters):
      # access all HPs through wandb.config, so logging matches execution!
      config = wandb.config

      # make the model, data, and optimization problem
      model, train_loader, val_loader, criterion, optimizer = make(config)
      print(model)

      # and use them to train the model
      train_model(model, train_loader, criterion, optimizer, device, config.epochs)

      # and test its final performance
      test_model(model, val_loader, device)

    return model

In [31]:
number = 31

In [41]:
config = dict(
    epochs=10,
    classes=7,
    kernels=[30, 42 ,64],
    kernel_size = 3,
    batch_size=115,
    learning_rate=0.001,
    drop_threshold=0.25,
    dataset="icml_face_data",
    architecture="CNN_BatchNorm_Resnet")

name ="run_" + str(number)
number+=1

model = model_pipeline(config,name)

ThreeLayerBatchResnet(
  (conv1): Conv2d(1, 30, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(30, 46, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(46, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (res_block): ResidualBlock(
    (conv1): Conv2d(46, 46, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(46, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): LeakyReLU(negative_slope=0.01)
    (conv2): Conv2d(46, 46, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(46, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): Conv2d(46, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilatio



Loss after 17135 examples: 1.318
Loss after 20010 examples: 1.405
Loss after 22885 examples: 1.273
Loss after 25760 examples: 1.280
Loss after 28635 examples: 1.313
Epoch 2/10, Loss: 1.3744, Accuracy: 46.90%




Loss after 02760 examples: 1.275
Loss after 05635 examples: 1.216
Loss after 08510 examples: 1.374
Loss after 11385 examples: 1.318




Loss after 14260 examples: 1.317
Loss after 17135 examples: 1.293
Loss after 20010 examples: 1.356
Loss after 22885 examples: 1.170
Loss after 25760 examples: 1.173
Loss after 28635 examples: 1.218
Epoch 3/10, Loss: 1.2873, Accuracy: 50.88%




Loss after 02760 examples: 1.157
Loss after 05635 examples: 1.122
Loss after 08510 examples: 1.179
Loss after 11385 examples: 1.306
Loss after 14260 examples: 1.173




Loss after 17135 examples: 1.323
Loss after 20010 examples: 1.334
Loss after 22885 examples: 1.270
Loss after 25760 examples: 1.303




Loss after 28635 examples: 1.181
Epoch 4/10, Loss: 1.2311, Accuracy: 52.96%
Loss after 02760 examples: 1.226
Loss after 05635 examples: 1.079
Loss after 08510 examples: 1.183
Loss after 11385 examples: 1.147




Loss after 14260 examples: 1.179
Loss after 17135 examples: 1.273
Loss after 20010 examples: 1.182
Loss after 22885 examples: 1.174
Loss after 25760 examples: 1.093




Loss after 28635 examples: 1.133
Epoch 5/10, Loss: 1.1933, Accuracy: 54.44%
Loss after 02760 examples: 1.316
Loss after 05635 examples: 1.219
Loss after 08510 examples: 1.071
Loss after 11385 examples: 1.312




Loss after 14260 examples: 1.259
Loss after 17135 examples: 1.271
Loss after 20010 examples: 1.191
Loss after 22885 examples: 1.220
Loss after 25760 examples: 1.194




Loss after 28635 examples: 1.027
Epoch 6/10, Loss: 1.1606, Accuracy: 55.99%
Loss after 02760 examples: 1.111
Loss after 05635 examples: 1.118
Loss after 08510 examples: 1.004
Loss after 11385 examples: 1.130




Loss after 14260 examples: 1.138
Loss after 17135 examples: 1.118
Loss after 20010 examples: 1.114
Loss after 22885 examples: 1.094
Loss after 25760 examples: 1.008




Loss after 28635 examples: 1.190
Epoch 7/10, Loss: 1.1350, Accuracy: 56.70%
Loss after 02760 examples: 1.181
Loss after 05635 examples: 1.189
Loss after 08510 examples: 1.090




Loss after 11385 examples: 1.035
Loss after 14260 examples: 1.046
Loss after 17135 examples: 1.062
Loss after 20010 examples: 0.981




Loss after 22885 examples: 1.149
Loss after 25760 examples: 0.913
Loss after 28635 examples: 1.152
Epoch 8/10, Loss: 1.1175, Accuracy: 57.46%
Loss after 02760 examples: 1.103
Loss after 05635 examples: 1.108




Loss after 08510 examples: 1.157
Loss after 11385 examples: 1.204
Loss after 14260 examples: 0.975
Loss after 17135 examples: 1.128
Loss after 20010 examples: 1.079




Loss after 22885 examples: 1.124
Loss after 25760 examples: 1.358
Loss after 28635 examples: 1.025
Epoch 9/10, Loss: 1.0962, Accuracy: 58.32%
Loss after 02760 examples: 1.020
Loss after 05635 examples: 0.989




Loss after 08510 examples: 1.083
Loss after 11385 examples: 1.010
Loss after 14260 examples: 0.959
Loss after 17135 examples: 1.091
Loss after 20010 examples: 1.251




Loss after 22885 examples: 1.155
Loss after 25760 examples: 1.031
Loss after 28635 examples: 1.066
Epoch 10/10, Loss: 1.0805, Accuracy: 59.04%
Test Accuracy: 56.00%


0,1
Test Accuracy,▁
epoch,▁▁▁▁▁▁▁▁▁██████████
loss,▇█▇▆▅▃▁▂▁▅
train/accuracy,▁▄▆▆▇▇▇███
train/loss,█▅▄▃▂▂▂▁▁▁

0,1
Test Accuracy,56.0
epoch,10.0
loss,1.60234
train/accuracy,59.04
train/loss,1.0805
