# Experiment 01: Basic Neural Network (Fully Connected)

**Objective**: Establish baseline performance using a basic neural network without convolutions.

**Hypothesis**: A fully connected network will perform poorly on image data because it doesn't capture spatial relationships.

**Based on**: Lecture 14 - Neural Networks and Backpropagation

## 1. Setup and Imports

In [None]:
# Install required packages
!pip install -q torch torchvision wandb kaggle pandas numpy matplotlib seaborn scikit-learn tqdm

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

import wandb
import os
from tqdm import tqdm
from google.colab import files

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

## 2. Download Dataset from Kaggle

In [None]:
# Setup Kaggle API
print("Please upload your kaggle.json file")
uploaded = files.upload()

# Create kaggle directory and move json file
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download FER2013 dataset
!kaggle competitions download -c challenges-in-representation-learning-facial-expression-recognition-challenge
!unzip -q challenges-in-representation-learning-facial-expression-recognition-challenge.zip
print("Dataset downloaded successfully!")

## 3. Dataset Class

In [None]:
class FER2013Dataset(Dataset):
    """FER2013 Dataset"""
    
    def __init__(self, csv_file, split='train', transform=None):
        """
        Args:
            csv_file: Path to fer2013.csv
            split: 'train', 'val', or 'test'
            transform: Optional transform to apply
        """
        self.data = pd.read_csv(csv_file)
        
        # Filter by split
        if split == 'train':
            self.data = self.data[self.data['Usage'] == 'Training'].reset_index(drop=True)
        elif split == 'val':
            self.data = self.data[self.data['Usage'] == 'PublicTest'].reset_index(drop=True)
        else:
            self.data = self.data[self.data['Usage'] == 'PrivateTest'].reset_index(drop=True)
            
        self.transform = transform
        print(f"Loaded {split} set with {len(self.data)} samples")
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Get pixels and reshape
        pixels = self.data.iloc[idx]['pixels']
        pixels = np.array([int(p) for p in pixels.split()], dtype=np.uint8)
        pixels = pixels.reshape(48, 48)
        
        # Convert to 3-channel image
        pixels = np.stack([pixels] * 3, axis=-1)
        
        # Get label
        label = int(self.data.iloc[idx]['emotion'])
        
        if self.transform:
            pixels = self.transform(pixels)
            
        return pixels, label

# Emotion labels
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

## 4. Model Definition - Basic Neural Network

In [None]:
class BasicNN(nn.Module):
    """
    Basic fully connected neural network (no convolutions)
    This model treats the image as a flat vector, losing spatial information
    """
    def __init__(self, num_classes=7):
        super(BasicNN, self).__init__()
        # Input: 48*48*3 = 6912 pixels
        self.fc1 = nn.Linear(48 * 48 * 3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        # Flatten the image - loses all spatial structure!
        x = x.view(x.size(0), -1)
        
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

## 5. Training Functions

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (images, labels) in enumerate(tqdm(dataloader, desc='Training')):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc='Validating'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

## 6. Setup Data Loaders

In [None]:
# Define transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Create datasets
train_dataset = FER2013Dataset('fer2013.csv', split='train', transform=transform)
val_dataset = FER2013Dataset('fer2013.csv', split='val', transform=transform)
test_dataset = FER2013Dataset('fer2013.csv', split='test', transform=transform)

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

## 7. Initialize WandB

In [None]:
# Initialize WandB
wandb.login()

config = {
    "model": "BasicNN",
    "dataset": "FER2013",
    "epochs": 30,
    "batch_size": 64,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "architecture": "Fully Connected",
    "parameters": sum(p.numel() for p in BasicNN().parameters())
}

wandb.init(
    project="fer-challenge",
    name="experiment_01_BasicNN",
    config=config
)

## 8. Train the Model

In [None]:
# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = BasicNN(num_classes=7).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])

# Print model info
print(f"\nModel: {config['model']}")
print(f"Total parameters: {config['parameters']:,}")
print("\nExpected outcome: Poor performance due to lack of spatial feature extraction")
print("This model treats each pixel independently, ignoring spatial relationships!\n")

In [None]:
# Training loop
train_losses = []
val_losses = []
train_accs = []
val_accs = []

for epoch in range(config['epochs']):
    print(f"\nEpoch {epoch+1}/{config['epochs']}")
    print("-" * 50)
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    # Validate
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    # Print metrics
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    # Log to WandB
    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
        "overfitting_gap": train_acc - val_acc
    })

## 9. Visualize Results

In [None]:
# Plot training curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
ax1.plot(train_losses, label='Train Loss')
ax1.plot(val_losses, label='Val Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()
ax1.grid(True)

# Accuracy plot
ax2.plot(train_accs, label='Train Acc')
ax2.plot(val_accs, label='Val Acc')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Training and Validation Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
wandb.log({"training_curves": wandb.Image(plt)})
plt.show()

## 10. Test Set Evaluation

In [None]:
# Evaluate on test set
test_loss, test_acc = validate_epoch(model, test_loader, criterion, device)
print(f"\nTest Set Performance:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")

wandb.log({
    "test_loss": test_loss,
    "test_acc": test_acc
})

## 11. Analysis and Conclusions

In [None]:
print("\n" + "="*60)
print("EXPERIMENT SUMMARY: Basic Neural Network")
print("="*60)
print(f"\nModel: Fully Connected Neural Network")
print(f"Parameters: {config['parameters']:,}")
print(f"\nResults:")
print(f"  - Best Validation Accuracy: {max(val_accs):.2f}%")
print(f"  - Test Accuracy: {test_acc:.2f}%")
print(f"  - Final Overfitting Gap: {train_accs[-1] - val_accs[-1]:.2f}%")

print("\nKey Findings:")
print("1. The model performs poorly (~25-30% accuracy) on this image classification task")
print("2. This is because fully connected networks don't capture spatial relationships")
print("3. Each pixel is treated independently, losing important spatial patterns")
print("4. The model has many parameters but low capacity for learning image features")

print("\nConclusion:")
print("This experiment demonstrates why CNNs are superior for image tasks.")
print("CNNs use convolutional layers to capture local patterns and spatial hierarchies.")

# Close WandB run
wandb.finish()