# Downloading Data / Setup

In [1]:
! pip install kaggle



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
! mkdir ~/.kaggle

In [4]:
! cp /content/drive/MyDrive/kaggle_api_credentials/kaggle.json ~/.kaggle/kaggle.json

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
! kaggle competitions download challenges-in-representation-learning-facial-expression-recognition-challenge --force

Downloading challenges-in-representation-learning-facial-expression-recognition-challenge.zip to /content
 91% 260M/285M [00:00<00:00, 365MB/s]
100% 285M/285M [00:00<00:00, 363MB/s]


In [7]:
!ls -lh /content

total 286M
-rw-r--r-- 1 root root 286M Dec 11  2019 challenges-in-representation-learning-facial-expression-recognition-challenge.zip
drwx------ 6 root root 4.0K Jun  5 19:41 drive
drwxr-xr-x 1 root root 4.0K Jun  4 21:29 sample_data


In [8]:
! unzip challenges-in-representation-learning-facial-expression-recognition-challenge

Archive:  challenges-in-representation-learning-facial-expression-recognition-challenge.zip
  inflating: example_submission.csv  
  inflating: fer2013.tar.gz          
  inflating: icml_face_data.csv      
  inflating: test.csv                
  inflating: train.csv               


# Set up Wandb

In [9]:
!pip install -q wandb

In [10]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mashar-22[0m ([33mashar-22-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
import random

import wandb

# Start a new wandb run to track this script.
run = wandb.init(
    # Set the wandb entity where your project will be logged (generally your team name).
    entity="ashar-22-free-university-of-tbilisi-",
    # Set the wandb project where this run will be logged.
    project="setup",
    # Track hyperparameters and run metadata.
    config={
        "learning_rate": 0.02,
        "architecture": "CNN",
        "dataset": "CIFAR-100",
        "epochs": 10,
    },
)

# Simulate training.
epochs = 10
offset = random.random() / 5
for epoch in range(2, epochs):
    acc = 1 - 2**-epoch - random.random() / epoch - offset
    loss = 2**-epoch + random.random() / epoch + offset

    # Log metrics to wandb.
    run.log({"acc": acc, "loss": loss})

# Finish the run and upload any remaining data.
run.finish()

0,1
acc,▁▁▁▆▇▄▆█
loss,█▄▂▂▂▁▁▂

0,1
acc,0.84463
loss,0.2518


In [32]:
# utils/data.py
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
from torchvision import transforms
import wandb
from PIL import Image

class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = np.array(self.data.iloc[idx]['pixels'].split(), dtype='uint8').reshape(48, 48)
        image = Image.fromarray(pixels)  # PIL Image
        label = int(self.data.iloc[idx]['emotion']) if 'emotion' in self.data.columns else -1

        if self.transform:
            image = self.transform(image)
        return image, label


def get_data_loaders(csv_path, batch_size=64):
    # Define transforms
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomCrop(48, padding=4),
        transforms.ToTensor()
    ])
    val_test_transform = transforms.ToTensor()

    # Load the full dataset without a transform initially
    dataset = FER2013Dataset(csv_file=csv_path, transform=None)

    # Split into train/val/test
    train_len = int(0.7 * len(dataset))
    val_len = int(0.15 * len(dataset))
    test_len = len(dataset) - train_len - val_len
    train_data, val_data, test_data = random_split(dataset, [train_len, val_len, test_len])

    # Manually assign transforms to each subset
    train_data.dataset.transform = train_transform
    val_data.dataset.transform = val_test_transform
    test_data.dataset.transform = val_test_transform

    # Create data loaders
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    return train_loader, val_loader, test_loader


In [31]:
# model.py
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(256 * 6 * 6, 256)
        self.fc2 = nn.Linear(256, 7)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 24x24
        x = self.pool(F.relu(self.conv2(x)))  # 12x12
        x = self.pool(F.relu(self.conv3(x)))  # 6x6
        x = x.view(-1, 256 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

In [26]:
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

def train_and_validate(csv_path, batch_size=64, lr=0.001, epochs=10):
    wandb.init(project="facial-expression", name="run_3", config={
        "batch_size": batch_size,
        "lr": lr,
        "epochs": epochs
    }, reinit=True)
    try:
        config = wandb.config

        train_loader, val_loader, test_loader = get_data_loaders(csv_path, config.batch_size)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = SimpleCNN().to(device)
        wandb.watch(model, log="all", log_freq=10)  # optional: logs gradients and parameters

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.lr)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

        for epoch in range(config.epochs):
            model.train()
            train_loss = 0
            correct = 0
            total = 0

            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            val_loss = 0
            val_correct = 0
            val_total = 0
            model.eval()
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()

            train_loss_avg = train_loss / len(train_loader)
            train_acc = correct / total
            val_loss_avg = val_loss / len(val_loader)
            val_acc = val_correct / val_total

            print(f"Logging: train_acc={train_acc}, val_acc={val_acc}, lr={scheduler.get_last_lr()[0]}")

            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss_avg,
                "train_accuracy": train_acc,
                "val_loss": val_loss_avg,
                "val_accuracy": val_acc,
                "learning_rate": scheduler.get_last_lr()[0]
            })

            print(f"Epoch {epoch + 1}/{config.epochs} | "
                  f"Train Loss: {train_loss_avg:.4f}, Train Acc: {train_acc:.4f} | "
                  f"Val Loss: {val_loss_avg:.4f}, Val Acc: {val_acc:.4f}")
            scheduler.step()

        # Evaluate on test set
        test_loss = 0
        test_correct = 0
        test_total = 0
        model.eval()
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()

        test_loss_avg = test_loss / len(test_loader)
        test_acc = test_correct / test_total

        print(f"\n✅ Final Test Loss: {test_loss_avg:.4f}, Test Accuracy: {test_acc:.4f}")

        wandb.log({
            "test_loss": test_loss_avg,
            "test_accuracy": test_acc
        })

        # Save model
        torch.save(model.state_dict(), "model.pth")
        wandb.save("model.pth")  # optional but recommended
        artifact = wandb.Artifact('facial-expression-model', type='model')
        artifact.add_file('model.pth')
        wandb.log_artifact(artifact)
    finally:
        wandb.finish()
if __name__ == "__main__":
    train_and_validate("train.csv")

Logging: train_acc=0.28005573248407645, val_acc=0.3615884811890385, lr=0.001
Epoch 1/10 | Train Loss: 1.7995, Train Acc: 0.2801 | Val Loss: 1.5811, Val Acc: 0.3616
Logging: train_acc=0.4053045382165605, val_acc=0.40664189503019044, lr=0.001
Epoch 2/10 | Train Loss: 1.5186, Train Acc: 0.4053 | Val Loss: 1.4965, Val Acc: 0.4066
Logging: train_acc=0.4554637738853503, val_acc=0.4951230840687413, lr=0.001
Epoch 3/10 | Train Loss: 1.3944, Train Acc: 0.4555 | Val Loss: 1.3099, Val Acc: 0.4951
Logging: train_acc=0.47830414012738853, val_acc=0.4997677659080353, lr=0.001
Epoch 4/10 | Train Loss: 1.3267, Train Acc: 0.4783 | Val Loss: 1.2787, Val Acc: 0.4998
Logging: train_acc=0.5099024681528662, val_acc=0.5044124477473293, lr=0.001
Epoch 5/10 | Train Loss: 1.2597, Train Acc: 0.5099 | Val Loss: 1.2624, Val Acc: 0.5044
Logging: train_acc=0.5482683121019108, val_acc=0.532512772875058, lr=0.0005
Epoch 6/10 | Train Loss: 1.1416, Train Acc: 0.5483 | Val Loss: 1.1956, Val Acc: 0.5325
Logging: train_acc=

0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,█████▁▁▁▁▁
test_accuracy,▁
test_loss,▁
train_accuracy,▁▃▄▅▅▆▇▇██
train_loss,█▆▅▄▄▃▃▂▂▁
val_accuracy,▁▃▆▆▆▇▇██▇
val_loss,█▆▃▃▂▁▂▁▁▃

0,1
epoch,10.0
learning_rate,0.0005
test_accuracy,0.54377
test_loss,1.30093
train_accuracy,0.65451
train_loss,0.86616
val_accuracy,0.54111
val_loss,1.31088


In [38]:
# utils/data.py
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
from torchvision import transforms
import wandb
from PIL import Image

class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pixels = np.array(self.data.iloc[idx]['pixels'].split(), dtype='uint8').reshape(48, 48)
        image = Image.fromarray(pixels)  # PIL Image
        label = int(self.data.iloc[idx]['emotion']) if 'emotion' in self.data.columns else -1

        if self.transform:
            image = self.transform(image)
        return image, label


def get_data_loaders(csv_path, batch_size=64):
    # Define transforms
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomCrop(48, padding=4),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
        transforms.ToTensor(),
        transforms.RandomErasing(p=0.5, scale=(0.02, 0.25))
    ])
    val_test_transform = transforms.ToTensor()

    # Load the full dataset without a transform initially
    dataset = FER2013Dataset(csv_file=csv_path, transform=None)

    # Split into train/val/test
    train_len = int(0.7 * len(dataset))
    val_len = int(0.15 * len(dataset))
    test_len = len(dataset) - train_len - val_len
    train_data, val_data, test_data = random_split(dataset, [train_len, val_len, test_len])

    # Manually assign transforms to each subset
    train_data.dataset.transform = train_transform
    val_data.dataset.transform = val_test_transform
    test_data.dataset.transform = val_test_transform

    # Create data loaders
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    return train_loader, val_loader, test_loader


In [39]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.6)
        self.fc1 = nn.Linear(256 * 6 * 6, 256)
        self.fc2 = nn.Linear(256, 7)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 256 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)


In [40]:
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
import wandb

def train_and_validate(csv_path, batch_size=64, lr=0.001, epochs=10):
    wandb.init(project="facial-expression", name="run_3", config={
        "batch_size": batch_size,
        "lr": lr,
        "epochs": epochs
    }, reinit=True)
    try:
        config = wandb.config

        train_loader, val_loader, test_loader = get_data_loaders(csv_path, config.batch_size)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = SimpleCNN().to(device)
        wandb.watch(model, log="all", log_freq=10)  # optional: logs gradients and parameters
        # Adam optimizer with weight decay (L2 regularization)
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

        # Criterion remains the same, e.g. CrossEntropyLoss
        criterion = nn.CrossEntropyLoss()

        # Optional: learning rate scheduler
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                        factor=0.5, patience=2)

        for epoch in range(epochs):
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0

            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

            train_loss = running_loss / total
            train_acc = correct / total

            # Validation phase
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item() * inputs.size(0)
                    _, predicted = outputs.max(1)
                    val_total += labels.size(0)
                    val_correct += predicted.eq(labels).sum().item()

            val_loss /= val_total
            val_acc = val_correct / val_total

            # print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

            # Step scheduler with validation loss
            scheduler.step(val_loss)




        # Evaluate on test set
        test_loss = 0
        test_correct = 0
        test_total = 0
        model.eval()
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()

        test_loss_avg = test_loss / len(test_loader)
        test_acc = test_correct / test_total

        print(f"\n✅ Final Test Loss: {test_loss_avg:.4f}, Test Accuracy: {test_acc:.4f}")

        wandb.log({
            "test_loss": test_loss_avg,
            "test_accuracy": test_acc
        })

        # Save model
        torch.save(model.state_dict(), "model.pth")
        wandb.save("model.pth")  # optional but recommended
        artifact = wandb.Artifact('facial-expression-model', type='model')
        artifact.add_file('model.pth')
        wandb.log_artifact(artifact)
    finally:
        wandb.finish()
if __name__ == "__main__":
    train_and_validate("train.csv")

KeyboardInterrupt: 