# Neural Networks Project: MNIST Classification with CNN

This notebook demonstrates how to use the neural networks framework to train a CNN model on the MNIST dataset.

## Setup

First, let's import the necessary libraries and set up our environment.

In [None]:
import os
import sys
import torch
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Add the project root to the path
sys.path.append('..')

# Import project modules
from src.models.cnn_model import CNNModel
from src.utils.trainer import Trainer
from src.utils.metrics import MetricsTracker
from src.config.config_manager import ConfigManager, get_default_config

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## Load and Prepare Data

We'll use the MNIST dataset for this example.

In [None]:
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean and std
])

# Load MNIST dataset
train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('../data', train=False, transform=transform)

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Print dataset information
print(f"Training dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

## Visualize Some Examples

In [None]:
# Get a batch of training data
examples = iter(train_loader)
example_data, example_targets = next(examples)

# Plot some examples
plt.figure(figsize=(15, 6))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(example_data[i][0], cmap='gray')
    plt.title(f"Label: {example_targets[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

## Configure and Create the Model

Let's use our configuration manager to set up the model parameters.

In [None]:
# Start with the default configuration
config_manager = ConfigManager(default_config=get_default_config())
config = config_manager.get_all()

# Update configuration for MNIST
config_manager.set('model.input_channels', 1)  # MNIST images are grayscale
config_manager.set('model.num_classes', 10)  # 10 digits
config_manager.set('cnn.conv_channels', [32, 64, 128])  # CNN architecture
config_manager.set('cnn.fc_units', [512, 128])  # Fully-connected layers
config_manager.set('training.num_epochs', 5)  # Number of epochs
config_manager.set('training.learning_rate', 0.001)  # Learning rate

# Create model configuration
model_config = {
    'input_channels': config['model']['input_channels'],
    'num_classes': config['model']['num_classes'],
    'conv_channels': config['cnn']['conv_channels'],
    'fc_units': config['cnn']['fc_units'],
    'dropout_rate': config['model']['dropout_rate']
}

# Create the model
model = CNNModel(model_config)
model = model.to(device)

# Print model summary
print(f"CNN Model created with {model.get_parameter_count():,} trainable parameters")

## Set Up the Trainer

Now let's set up the training configuration and create our trainer.

In [None]:
# Create trainer configuration
trainer_config = {
    'learning_rate': config['training']['learning_rate'],
    'weight_decay': config['training']['weight_decay'],
    'num_epochs': config['training']['num_epochs'],
    'batch_size': batch_size,
    'optimizer': 'adam',  # Use Adam optimizer
    'scheduler': 'cosine',  # Use cosine annealing scheduler
    'criterion': 'cross_entropy',  # Use cross-entropy loss
    'clip_grad_norm': 1.0,  # Clip gradients
    'early_stopping_patience': 5,  # Stop training if no improvement after 5 epochs
    'checkpoint_dir': '../checkpoints',  # Directory to save model checkpoints
    'save_best_only': True  # Only save the best model
}

# Create directories if they don't exist
os.makedirs(trainer_config['checkpoint_dir'], exist_ok=True)

# Create the trainer
trainer = Trainer(model, trainer_config, device)

## Train the Model

Now we're ready to train our model.

In [None]:
# Start training
print(f"Starting training for {trainer_config['num_epochs']} epochs...")
stats = trainer.train(train_loader, test_loader)

# Print best results
print(f"\nBest validation accuracy: {stats['best_val_acc']:.2f}%")
print(f"Best validation loss: {stats['best_val_loss']:.4f} (epoch {stats['best_epoch']})")

## Visualize Training Results

Let's visualize how the training and validation metrics changed during training.

In [None]:
# Plot training and validation loss
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(range(1, len(stats['train_loss']) + 1), stats['train_loss'], label='Training Loss')
plt.plot(range(1, len(stats['val_loss']) + 1), stats['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(range(1, len(stats['train_acc']) + 1), stats['train_acc'], label='Training Accuracy')
plt.plot(range(1, len(stats['val_acc']) + 1), stats['val_acc'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

## Load the Best Model and Evaluate on Test Set

In [None]:
# Load the best model
best_model_path = os.path.join(trainer_config['checkpoint_dir'], 'best_model.pt')
model.load(best_model_path)

# Evaluate on test set
print("Evaluating the best model on the test set...")
test_loss, test_acc = trainer.evaluate(test_loader, desc="Test")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")

## Visualize Predictions

Let's visualize some predictions from our trained model.

In [None]:
# Get a batch of test data
test_examples = iter(test_loader)
test_images, test_labels = next(test_examples)

# Move to device
test_images = test_images.to(device)
test_labels = test_labels.to(device)

# Get predictions
model.eval()
with torch.no_grad():
    outputs = model(test_images)
    _, predicted = torch.max(outputs, 1)

# Move tensors back to CPU for plotting
test_images = test_images.cpu()
test_labels = test_labels.cpu()
predicted = predicted.cpu()

# Plot images with predictions
plt.figure(figsize=(15, 8))
for i in range(15):
    plt.subplot(3, 5, i+1)
    plt.imshow(test_images[i][0], cmap='gray')
    title_color = 'green' if predicted[i] == test_labels[i] else 'red'
    plt.title(f"True: {test_labels[i]}, Pred: {predicted[i]}", color=title_color)
    plt.axis('off')
plt.tight_layout()
plt.show()

## Calculate Detailed Metrics

In [None]:
# Create a metrics tracker
metrics_tracker = MetricsTracker(task_type='classification', n_classes=10)

# Evaluate the model and track metrics
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probabilities = torch.softmax(outputs, dim=1)
        _, predicted = torch.max(outputs, 1)
        
        # Update the metrics tracker
        metrics_tracker.update(labels, predicted, probabilities)

# Print the metrics
metrics_tracker.print_metrics()

## Save the Model Configuration

Let's save the model configuration for future reference.

In [None]:
# Save the configuration to a file
os.makedirs('../outputs', exist_ok=True)
config_path = '../outputs/mnist_cnn_config.yaml'
config_manager.save_config(config_path)
print(f"Configuration saved to {config_path}")

## Conclusion

In this notebook, we have demonstrated how to use the neural networks framework to:

1. Load and prepare data using PyTorch's dataset and dataloader utilities
2. Configure and create a CNN model using our model implementation
3. Train the model using our training utilities
4. Evaluate the model and visualize the results
5. Save the model and configuration for future use

This framework provides a flexible and powerful foundation for implementing and experimenting with different neural network architectures and training strategies.