# Basic CNN Experiment

Simple CNN to establish baseline performance on FER2013 dataset.

In [None]:
!pip install -q torch torchvision wandb pandas numpy matplotlib tqdm

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import wandb
import os
from tqdm import tqdm

## Download Data from Kaggle

In [None]:
# Mount Drive for kaggle.json
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Setup Kaggle
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download dataset
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
!unzip -q challenges-in-representation-learning-facial-expression-recognition-challenge.zip

# Extract tar.gz if present
if os.path.exists('fer2013.tar.gz'):
    !tar -xzf fer2013.tar.gz
    print("Extracted fer2013.tar.gz")

In [None]:
# Check what files we have
print("Available CSV files:")
!ls -la *.csv

# Check the structure of each file
for file in ['train.csv', 'test.csv', 'icml_face_data.csv']:
    if os.path.exists(file):
        df = pd.read_csv(file)
        print(f"\n{file}: {df.shape}")
        print(f"Columns: {df.columns.tolist()}")
        print(f"First row sample: {df.iloc[0].values[:3]}...")

## Dataset Class

In [None]:
class FERDataset(Dataset):
    def __init__(self, csv_file, transform=None, is_test=False):
        """
        Handles both train.csv and test.csv formats
        """
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.is_test = is_test
        
        print(f"Loaded {csv_file} with shape: {self.data.shape}")
        print(f"Columns: {self.data.columns.tolist()[:5]}...")  # Show first 5 columns
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        
        # Get emotion label (first column in train.csv)
        if self.is_test:
            label = 0  # No labels in test set
        else:
            label = int(row[0])  # First column is emotion
        
        # Get pixels (second column in train.csv, first in test.csv)
        if self.is_test:
            pixels = row[0]  # First column in test.csv
        else:
            pixels = row[1]  # Second column in train.csv
            
        # Convert pixels string to numpy array
        pixels = np.array(pixels.split(' '), dtype='float32').reshape(48, 48)
        pixels = pixels / 255.0  # normalize to [0, 1]
        
        # Convert to 3 channel image
        image = np.stack([pixels] * 3, axis=2)
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

## Simple CNN Model

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(128 * 6 * 6, 256)
        self.fc2 = nn.Linear(256, 7)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        
        x = x.view(-1, 128 * 6 * 6)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

## Setup Data

In [None]:
# Transform
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Use train.csv for training
if os.path.exists('train.csv'):
    print("Using train.csv for training")
    train_dataset = FERDataset('train.csv', transform=transform, is_test=False)
    
    # Split into train/val (80/20)
    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
    
    print(f"Train: {train_size}, Val: {val_size}")
else:
    print("train.csv not found!")

# Create loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

## Train

In [None]:
# Initialize wandb
wandb.login()
wandb.init(project="fer-challenge", name="basic-cnn")

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
epochs = 20

for epoch in range(epochs):
    # Train
    model.train()
    train_loss = 0
    train_correct = 0
    
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_correct += predicted.eq(labels).sum().item()
    
    # Validate
    model.eval()
    val_loss = 0
    val_correct = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_correct += predicted.eq(labels).sum().item()
    
    # Calculate metrics
    train_acc = 100. * train_correct / len(train_dataset)
    val_acc = 100. * val_correct / len(val_dataset)
    
    print(f"Epoch {epoch+1}/{epochs}")
    print(f"Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")
    
    # Log to wandb
    wandb.log({
        "train_acc": train_acc,
        "val_acc": val_acc,
        "train_loss": train_loss/len(train_loader),
        "val_loss": val_loss/len(val_loader)
    })

In [None]:
wandb.finish()
print("Training complete!")