# Optimized CIFAR-10 Model with Advanced Data Augmentation (Modular Version)

**Target**: <200k parameters, RF>44, >85% accuracy ✅ **VERY CLOSE (84.08%)**

**Model Specifications**:
- **Parameters**: 150,690 (<200k constraint ✅)
- **Receptive Field**: 67 (>44 requirement ✅)
- **Peak Accuracy**: 84.08% (very close to 85% target)
- **Architecture**: Depthwise separable convolutions with optimized channel progression

**Advanced Techniques**:
- **MixUp**: Linear interpolation between training examples and labels (Zhang et al., 2017)
- **RICAP**: Random Image Cropping and Patching (Takahashi et al., 2018)
- **CutMix**: Cut and paste augmentation
- **Efficient Architecture**: Optimized for parameter efficiency

**Architecture**: Efficient CNN with depthwise separable convolutions
- **Initial Conv**: 3→20 channels, 32×32→32×32
- **Layer 1**: 2× EfficientBlock(20→20, stride=1), 32×32→32×32
- **Layer 2**: 2× EfficientBlock(20→40, stride=2), 32×32→16×16  
- **Layer 3**: 2× EfficientBlock(40→80, stride=2), 16×16→8×8
- **Layer 4**: 2× EfficientBlock(80→160, stride=2), 8×8→4×4
- **Classifier**: Global Average Pooling + Linear(160→10)

**Key Features**:
- Depthwise separable convolutions for maximum parameter efficiency
- Advanced data augmentation with MixUp, RICAP, and CutMix
- Label smoothing and proper regularization
- Optimized learning rate scheduling
- No SE attention blocks to maintain parameter constraint
- **EMA bug fixed**: Model selection now uses correct test accuracy (not EMA)

**Modular Structure**:
- `config.py`: All configuration parameters
- `models.py`: Neural network architectures
- `augmentation.py`: Data augmentation techniques
- `training.py`: Training functions and utilities
- `utils.py`: Utility functions and helpers


In [None]:
# Install required packages
%pip install albumentations --quiet
%pip install torch torchvision torchaudio --quiet
%pip install timm --quiet

# Import required libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
import math
import albumentations as A
from albumentations.pytorch import ToTensorV2
import time
import random
from collections import OrderedDict
import copy
warnings.filterwarnings('ignore')

# Import our modular components
from config import *
from models import OptimizedCIFAR10Net200K, calculate_receptive_field, count_parameters, print_model_summary
from augmentation import get_albumentations_transforms, get_augmentation_techniques
from training import train_model_advanced, test_epoch, evaluate_with_tta, LabelSmoothingCrossEntropy
from utils import set_seed, get_device, load_cifar10_data, plot_training_history, print_training_summary, create_results_dict, save_training_results, print_model_info

# Set device and seeds
device = get_device()
set_seed()


In [None]:
# Create model and verify parameters
model = OptimizedCIFAR10Net200K().to(device)
total_params, trainable_params = count_parameters(model)
rf = calculate_receptive_field(model)

print(f'Total parameters: {total_params:,}')
print(f'Trainable parameters: {trainable_params:,}')
print(f'Parameters < 200k: {"✅ YES" if total_params < 200000 else "❌ NO"}')
print(f'Expected: 150,690 parameters with RF=67')
print(f'Receptive Field: {rf}')
print(f'RF > 44: {"✅ YES" if rf > 44 else "❌ NO"}')


In [None]:
# Load data with augmentation
train_transform, test_transform = get_albumentations_transforms()
train_loader, test_loader = load_cifar10_data(train_transform, test_transform, TRAINING_CONFIG['batch_size'])

# Get augmentation techniques
augmentation_techniques = get_augmentation_techniques()

print(f"CIFAR-10 class names: {DATASET_CONFIG['class_names']}")


In [None]:
# Training Execution
print_model_info(model, total_params, rf)

# Start training
start_time = time.time()

train_losses, train_accs, test_losses, test_accs, best_acc = train_model_advanced(
    model, device, train_loader, test_loader, augmentation_techniques,
    epochs=TRAINING_CONFIG['epochs'], lr=TRAINING_CONFIG['learning_rate']
)

training_time = time.time() - start_time
print(f"Training completed in {training_time/3600:.2f} hours")


In [None]:
# Final evaluation with TTA
print("\n" + "=" * 60)
print("FINAL EVALUATION WITH TEST TIME AUGMENTATION")
print("=" * 60)

# Load best model
model.load_state_dict(torch.load('best_model_200k.pth'))
model.eval()

# Standard evaluation
test_loss, test_acc = test_epoch(model, device, test_loader, LabelSmoothingCrossEntropy(smoothing=0.1))
print(f"Standard Test Accuracy: {test_acc:.2f}%")

# TTA evaluation
tta_acc = evaluate_with_tta(model, test_loader, device, TTA_CONFIG['num_augmentations'])

# Print final results
print_training_summary(model, total_params, rf, train_accs, test_accs, best_acc, tta_acc, training_time)


In [None]:
# Visualization and Analysis
plot_training_history(train_losses, train_accs, test_losses, test_accs)

# Model summary
print_model_summary(model)

# Save training results
results = create_results_dict(
    'OptimizedCIFAR10Net200K', total_params, rf, train_accs, test_accs, 
    best_acc, tta_acc, training_time
)

save_training_results(results)
print(f"Best model saved to 'best_model_200k.pth'")
