# Downloading Data / Setup

In [7]:
! pip install kaggle



In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
! mkdir ~/.kaggle

In [10]:
! cp /content/drive/MyDrive/kaggle_api_credentials/kaggle.json ~/.kaggle/kaggle.json

In [11]:
! chmod 600 ~/.kaggle/kaggle.json

In [12]:
! kaggle competitions download challenges-in-representation-learning-facial-expression-recognition-challenge --force

Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 77% 221M/285M [00:00<00:00, 772MB/s] 
100% 285M/285M [00:00<00:00, 375MB/s]


In [13]:
!ls -lh /content

total 286M
-rw-r--r-- 1 root root 286M Dec 11  2019 challenges-in-representation-learning-facial-expression-recognition-challenge.zip
drwx------ 6 root root 4.0K Jun  5 13:58 drive
drwxr-xr-x 1 root root 4.0K Jun  3 14:04 sample_data
drwxr-xr-x 4 root root 4.0K Jun  5 13:57 wandb


In [14]:
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge

Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


# Set up Wandb

In [15]:
!pip install -q wandb

In [16]:
import wandb
wandb.login()



True

In [17]:
import random

import wandb

# Start a new wandb run to track this script.
run = wandb.init(
    # Set the wandb entity where your project will be logged (generally your team name).
    entity="ashar-22-free-university-of-tbilisi-",
    # Set the wandb project where this run will be logged.
    project="setup",
    # Track hyperparameters and run metadata.
    config={
        "learning_rate": 0.02,
        "architecture": "CNN",
        "dataset": "CIFAR-100",
        "epochs": 10,
    },
)

# Simulate training.
epochs = 10
offset = random.random() / 5
for epoch in range(2, epochs):
    acc = 1 - 2**-epoch - random.random() / epoch - offset
    loss = 2**-epoch + random.random() / epoch + offset

    # Log metrics to wandb.
    run.log({"acc": acc, "loss": loss})

# Finish the run and upload any remaining data.
run.finish()

0,1
acc,▁▁▁▆▇▄▆█
loss,█▄▂▂▂▁▁▂

0,1
acc,0.84463
loss,0.2518


In [61]:
# utils/data.py
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
from torchvision import transforms
import wandb

class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    from PIL import Image

    def __getitem__(self, idx):
        pixels = np.array(self.data.iloc[idx]['pixels'].split(), dtype='uint8').reshape(48, 48)
        image = Image.fromarray(pixels)  # PIL Image
        label = int(self.data.iloc[idx]['emotion']) if 'emotion' in self.data.columns else -1

        if self.transform:
            image = self.transform(image)
        return image, label


def get_data_loaders(csv_path, batch_size=64):
    # Define transforms
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomCrop(48, padding=4),
        transforms.ToTensor()
    ])
    val_test_transform = transforms.ToTensor()

    # Load the full dataset without a transform initially
    dataset = FER2013Dataset(csv_file=csv_path, transform=None)

    # Split into train/val/test
    train_len = int(0.7 * len(dataset))
    val_len = int(0.15 * len(dataset))
    test_len = len(dataset) - train_len - val_len
    train_data, val_data, test_data = random_split(dataset, [train_len, val_len, test_len])

    # Manually assign transforms to each subset
    train_data.dataset.transform = train_transform
    val_data.dataset.transform = val_test_transform
    test_data.dataset.transform = val_test_transform

    # Create data loaders
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    return train_loader, val_loader, test_loader


In [62]:
# model.py
import torch.nn as nn
import torch.nn.functional as F

class SmallCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(128 * 12 * 12, 128)
        self.fc2 = nn.Linear(128, 7)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 24x24
        x = self.pool(F.relu(self.conv2(x)))  # 12x12
        x = x.view(-1, 128 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

In [63]:
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

def train_and_validate(csv_path, batch_size=64, lr=0.001, epochs=10):
    wandb.init(project="facial-expression", name="run_4_small_cnn", config={
        "batch_size": batch_size,
        "lr": lr,
        "epochs": epochs
    }, reinit=True)
    try:
        config = wandb.config

        train_loader, val_loader, test_loader = get_data_loaders(csv_path, config.batch_size)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = SmallCNN().to(device)
        wandb.watch(model, log="all", log_freq=10)  # optional: logs gradients and parameters

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.lr)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

        for epoch in range(config.epochs):
            model.train()
            train_loss = 0
            correct = 0
            total = 0

            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            val_loss = 0
            val_correct = 0
            val_total = 0
            model.eval()
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()

            train_loss_avg = train_loss / len(train_loader)
            train_acc = correct / total
            val_loss_avg = val_loss / len(val_loader)
            val_acc = val_correct / val_total

            print(f"Logging: train_acc={train_acc}, val_acc={val_acc}, lr={scheduler.get_last_lr()[0]}")

            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss_avg,
                "train_accuracy": train_acc,
                "val_loss": val_loss_avg,
                "val_accuracy": val_acc,
                "learning_rate": scheduler.get_last_lr()[0]
            })

            print(f"Epoch {epoch + 1}/{config.epochs} | "
                  f"Train Loss: {train_loss_avg:.4f}, Train Acc: {train_acc:.4f} | "
                  f"Val Loss: {val_loss_avg:.4f}, Val Acc: {val_acc:.4f}")
            scheduler.step()

        # Evaluate on test set
        test_loss = 0
        test_correct = 0
        test_total = 0
        model.eval()
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()

        test_loss_avg = test_loss / len(test_loader)
        test_acc = test_correct / test_total

        print(f"\n✅ Final Test Loss: {test_loss_avg:.4f}, Test Accuracy: {test_acc:.4f}")

        wandb.log({
            "test_loss": test_loss_avg,
            "test_accuracy": test_acc
        })

        # Save model
        torch.save(model.state_dict(), "model.pth")
        wandb.save("model.pth")  # optional but recommended
        artifact = wandb.Artifact('facial-expression-model', type='model')
        artifact.add_file('model.pth')
        wandb.log_artifact(artifact)
    finally:
        wandb.finish()
if __name__ == "__main__":
    train_and_validate("train.csv")

Logging: train_acc=0.29304339171974525, val_acc=0.35392475615420343, lr=0.001
Epoch 1/10 | Train Loss: 1.7449, Train Acc: 0.2930 | Val Loss: 1.6408, Val Acc: 0.3539
Logging: train_acc=0.37126791401273884, val_acc=0.410822108685555, lr=0.001
Epoch 2/10 | Train Loss: 1.6017, Train Acc: 0.3713 | Val Loss: 1.5194, Val Acc: 0.4108
Logging: train_acc=0.41655055732484075, val_acc=0.444960520204366, lr=0.001
Epoch 3/10 | Train Loss: 1.5124, Train Acc: 0.4166 | Val Loss: 1.4350, Val Acc: 0.4450
Logging: train_acc=0.4452627388535032, val_acc=0.4621458430097538, lr=0.001
Epoch 4/10 | Train Loss: 1.4378, Train Acc: 0.4453 | Val Loss: 1.4197, Val Acc: 0.4621
Logging: train_acc=0.46133558917197454, val_acc=0.47282861124013004, lr=0.001
Epoch 5/10 | Train Loss: 1.3918, Train Acc: 0.4613 | Val Loss: 1.3530, Val Acc: 0.4728
Logging: train_acc=0.4905951433121019, val_acc=0.486065954482118, lr=0.0005
Epoch 6/10 | Train Loss: 1.3226, Train Acc: 0.4906 | Val Loss: 1.3402, Val Acc: 0.4861
Logging: train_acc

0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,█████▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▅▆▆▇▇███
train_loss,█▆▅▄▃▂▂▂▁▁
val_accuracy,▁▄▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
learning_rate,0.0005
test_accuracy,0.49965
test_loss,1.31117
train_accuracy,0.52603
train_loss,1.22451
val_accuracy,0.50534
val_loss,1.30689
