# Experiment 01: Basic Neural Network (Fully Connected)

**Objective**: Establish baseline using a basic neural network without convolutions.

**Hypothesis**: A fully connected network will perform poorly on image data because it doesn't capture spatial relationships.

**Expected Result**: Low accuracy (~25-30%) due to treating pixels independently.

In [None]:
# Install required packages
!pip install -q torch torchvision wandb pandas numpy matplotlib seaborn scikit-learn tqdm

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

import wandb
import os
from tqdm import tqdm

# Set random seed
torch.manual_seed(42)
np.random.seed(42)

## Check Available Files

In [None]:
# Check what files we have
print("Available CSV files:")
for file in os.listdir('.'):
    if file.endswith('.csv'):
        print(f"  - {file}")
        df = pd.read_csv(file)
        print(f"    Shape: {df.shape}, Columns: {df.columns.tolist()[:5]}...")

## Dataset Class

In [None]:
class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None, is_test=False):
        """
        Args:
            csv_file: 'train.csv' or 'test.csv'
            transform: transformations to apply
            is_test: True if using test.csv (no labels)
        """
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.is_test = is_test
        print(f"Loaded {csv_file}: {len(self.data)} samples")
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Get pixels - check column name
        if 'pixels' in self.data.columns:
            pixels = self.data.iloc[idx]['pixels']
        else:
            # If pixels are in separate columns (pixel0, pixel1, ...)
            pixel_cols = [col for col in self.data.columns if col.startswith('pixel')]
            if pixel_cols:
                pixels = ' '.join([str(self.data.iloc[idx][col]) for col in pixel_cols])
            else:
                raise ValueError("Cannot find pixel data in CSV")
        
        # Convert to numpy array
        pixels = np.array([int(p) for p in pixels.split()], dtype=np.uint8)
        pixels = pixels.reshape(48, 48)
        
        # Convert to 3-channel
        pixels = np.stack([pixels] * 3, axis=-1)
        
        # Get label
        if self.is_test:
            label = 0  # Dummy label for test set
        else:
            if 'emotion' in self.data.columns:
                label = int(self.data.iloc[idx]['emotion'])
            elif 'label' in self.data.columns:
                label = int(self.data.iloc[idx]['label'])
            else:
                raise ValueError("Cannot find label column")
        
        if self.transform:
            pixels = self.transform(pixels)
            
        return pixels, label

## Model: Basic Neural Network

In [None]:
class BasicNN(nn.Module):
    def __init__(self, num_classes=7):
        super(BasicNN, self).__init__()
        # Flatten image: 48*48*3 = 6912
        self.fc1 = nn.Linear(48 * 48 * 3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        # Flatten - loses spatial structure!
        x = x.view(x.size(0), -1)
        
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

## Training Functions

In [None]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc='Training'):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Validating'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100. * correct / total

## Setup Data

In [None]:
# Transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load full training data
if os.path.exists('train.csv'):
    full_train_dataset = FER2013Dataset('train.csv', transform=transform)
    
    # Split into train/val (80/20)
    train_size = int(0.8 * len(full_train_dataset))
    val_size = len(full_train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(
        full_train_dataset, [train_size, val_size]
    )
    print(f"Train: {train_size}, Val: {val_size}")
    
    # Test dataset
    if os.path.exists('test.csv'):
        test_dataset = FER2013Dataset('test.csv', transform=transform, is_test=True)
    else:
        test_dataset = None
        print("No test.csv found")
else:
    # Fallback to icml_face_data.csv if train.csv doesn't exist
    print("Using icml_face_data.csv instead")
    from torch.utils.data import Subset
    
    class ICML_Dataset(Dataset):
        def __init__(self, csv_file='icml_face_data.csv', usage='Training', transform=None):
            data = pd.read_csv(csv_file)
            self.data = data[data['Usage'] == usage].reset_index(drop=True)
            self.transform = transform
            print(f"{usage}: {len(self.data)} samples")
            
        def __len__(self):
            return len(self.data)
        
        def __getitem__(self, idx):
            pixels = self.data.iloc[idx]['pixels']
            pixels = np.array([int(p) for p in pixels.split()], dtype=np.uint8).reshape(48, 48)
            pixels = np.stack([pixels] * 3, axis=-1)
            label = int(self.data.iloc[idx]['emotion'])
            
            if self.transform:
                pixels = self.transform(pixels)
            return pixels, label
    
    train_dataset = ICML_Dataset(usage='Training', transform=transform)
    val_dataset = ICML_Dataset(usage='PublicTest', transform=transform)
    test_dataset = ICML_Dataset(usage='PrivateTest', transform=transform)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) if test_dataset else None

## Initialize WandB

In [None]:
wandb.login()

wandb.init(
    project="fer-challenge",
    name="01_BasicNN",
    config={
        "model": "BasicNN",
        "epochs": 30,
        "batch_size": 64,
        "learning_rate": 0.001,
        "optimizer": "Adam",
        "architecture": "4 FC layers",
        "expected": "Poor performance due to no spatial feature extraction"
    }
)

## Train Model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using: {device}")

model = BasicNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")
print("\nExpected: Low accuracy (~25-30%) because:")
print("- Treats each pixel independently")
print("- No spatial feature extraction")
print("- Loses all 2D structure when flattening")

In [None]:
# Training loop
train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(30):
    print(f"\nEpoch {epoch+1}/30")
    
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
    print(f"Overfitting Gap: {train_acc - val_acc:.2f}%")
    
    wandb.log({
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
        "overfitting_gap": train_acc - val_acc,
        "epoch": epoch
    })

## Results & Analysis

In [None]:
# Plot results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(train_losses, label='Train', linewidth=2)
ax1.plot(val_losses, label='Val', linewidth=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training vs Validation Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(train_accs, label='Train', linewidth=2)
ax2.plot(val_accs, label='Val', linewidth=2)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Training vs Validation Accuracy')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
wandb.log({"training_curves": wandb.Image(plt)})
plt.show()

In [None]:
print("="*60)
print("EXPERIMENT SUMMARY")
print("="*60)
print(f"\nModel: Basic Neural Network (Fully Connected)")
print(f"Best Validation Accuracy: {max(val_accs):.2f}%")
print(f"Final Training Accuracy: {train_accs[-1]:.2f}%")
print(f"Final Overfitting Gap: {train_accs[-1] - val_accs[-1]:.2f}%")

print("\nKey Findings:")
print(f"1. Poor performance: {max(val_accs):.2f}% (random guessing = 14.3%)")
print("2. The model struggles because it treats pixels independently")
print("3. No spatial feature extraction capability")
print("4. This demonstrates why CNNs are necessary for image tasks")

print("\nConclusion:")
print("Basic NNs are inadequate for image classification.")
print("We need convolutional layers to capture spatial patterns!")

wandb.finish()