In [None]:
# ============================================================
# CELL 1: Install Required Libraries
# ============================================================
# Run this cell first to install all necessary packages
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install scikit-learn tqdm matplotlib


In [None]:
# ============================================================
# CELL 2: Import Libraries and Setup
# ============================================================
# Import all necessary libraries and configure the environment
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import time
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Set device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


In [None]:
# ============================================================
# CELL 3: Define LeNet-5 Architecture
# ============================================================
# LeNet-5: Classic CNN architecture with 2 conv layers
def create_lenet5(num_classes=10):
    """LeNet-5 Architecture"""
    model = nn.Sequential(
        # Conv Layer 1
        nn.Conv2d(3, 6, kernel_size=5, padding=2),
        nn.ReLU(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        
        # Conv Layer 2
        nn.Conv2d(6, 16, kernel_size=5),
        nn.ReLU(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        
        # Flatten
        nn.Flatten(),
        
        # Fully Connected Layers
        nn.Linear(16 * 5 * 5, 120),
        nn.ReLU(),
        nn.Linear(120, 84),
        nn.ReLU(),
        nn.Linear(84, num_classes)
    )
    return model


In [None]:
# ============================================================
# CELL 4: Define AlexNet Architecture
# ============================================================
# AlexNet: Deep CNN with 5 conv layers and dropout
def create_alexnet(num_classes=10):
    """AlexNet Architecture (Simplified for smaller images)"""
    model = nn.Sequential(
        # Conv Block 1
        nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Conv Block 2
        nn.Conv2d(64, 192, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Conv Block 3
        nn.Conv2d(192, 384, kernel_size=3, padding=1),
        nn.ReLU(),
        
        # Conv Block 4
        nn.Conv2d(384, 256, kernel_size=3, padding=1),
        nn.ReLU(),
        
        # Conv Block 5
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Flatten and FC layers
        nn.Flatten(),
        nn.Linear(256 * 4 * 4, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, num_classes)
    )
    return model


In [None]:
# ============================================================
# CELL 5: Define VGGNet Architecture
# ============================================================
# VGGNet: Very deep network with small 3x3 filters
def create_vggnet(num_classes=10):
    """VGG-16 Architecture (Simplified)"""
    model = nn.Sequential(
        # Block 1
        nn.Conv2d(3, 64, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(64, 64, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Block 2
        nn.Conv2d(64, 128, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Block 3
        nn.Conv2d(128, 256, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Block 4
        nn.Conv2d(256, 512, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        # Flatten and FC
        nn.Flatten(),
        nn.Linear(512 * 2 * 2, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, num_classes)
    )
    return model


In [None]:
# ============================================================
# CELL 6: Define ResNet Building Blocks
# ============================================================
# ResNet: Helper function to create residual blocks
def create_resnet_block(in_channels, out_channels, stride=1):
    """Basic ResNet Block"""
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
        nn.BatchNorm2d(out_channels)
    )


In [None]:
# ============================================================
# CELL 7: Define ResNet-50 Architecture
# ============================================================
# ResNet-50: 50-layer network with residual connections
def create_resnet50(num_classes=10):
    """ResNet-50 Architecture (Simplified)"""
    class ResNet50(nn.Module):
        def __init__(self, num_classes):
            super(ResNet50, self).__init__()
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(64)
            self.relu = nn.ReLU()
            
            # Residual blocks
            self.layer1 = self._make_layer(64, 64, 3)
            self.layer2 = self._make_layer(64, 128, 4, stride=2)
            self.layer3 = self._make_layer(128, 256, 6, stride=2)
            self.layer4 = self._make_layer(256, 512, 3, stride=2)
            
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.fc = nn.Linear(512, num_classes)
        
        def _make_layer(self, in_channels, out_channels, blocks, stride=1):
            layers = []
            layers.append(create_resnet_block(in_channels, out_channels, stride))
            for _ in range(1, blocks):
                layers.append(create_resnet_block(out_channels, out_channels))
            return nn.Sequential(*layers)
        
        def forward(self, x):
            x = self.relu(self.bn1(self.conv1(x)))
            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)
            x = self.layer4(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)
            return x
    
    return ResNet50(num_classes)


In [None]:
# ============================================================
# CELL 8: Define ResNet-100 Architecture
# ============================================================
# ResNet-100: 100-layer network with residual connections
def create_resnet100(num_classes=10):
    """ResNet-100 Architecture (Simplified)"""
    class ResNet100(nn.Module):
        def __init__(self, num_classes):
            super(ResNet100, self).__init__()
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(64)
            self.relu = nn.ReLU()
            
            # More residual blocks for ResNet-100
            self.layer1 = self._make_layer(64, 64, 6)
            self.layer2 = self._make_layer(64, 128, 8, stride=2)
            self.layer3 = self._make_layer(128, 256, 12, stride=2)
            self.layer4 = self._make_layer(256, 512, 6, stride=2)
            
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.fc = nn.Linear(512, num_classes)
        
        def _make_layer(self, in_channels, out_channels, blocks, stride=1):
            layers = []
            layers.append(create_resnet_block(in_channels, out_channels, stride))
            for _ in range(1, blocks):
                layers.append(create_resnet_block(out_channels, out_channels))
            return nn.Sequential(*layers)
        
        def forward(self, x):
            x = self.relu(self.bn1(self.conv1(x)))
            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)
            x = self.layer4(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)
            return x
    
    return ResNet100(num_classes)


In [None]:
# ============================================================
# CELL 9: Define EfficientNet Architecture
# ============================================================
# EfficientNet: Efficient network with balanced scaling
def create_efficientnet(num_classes=10):
    """EfficientNet (Simplified version)"""
    class EfficientNet(nn.Module):
        def __init__(self, num_classes):
            super(EfficientNet, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                
                # MBConv blocks (simplified)
                nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                
                nn.Conv2d(16, 24, kernel_size=3, stride=2, padding=1, bias=False),
                nn.BatchNorm2d(24),
                nn.ReLU(),
                
                nn.Conv2d(24, 40, kernel_size=3, stride=2, padding=1, bias=False),
                nn.BatchNorm2d(40),
                nn.ReLU(),
                
                nn.Conv2d(40, 80, kernel_size=3, stride=2, padding=1, bias=False),
                nn.BatchNorm2d(80),
                nn.ReLU(),
                
                nn.Conv2d(80, 112, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(112),
                nn.ReLU(),
                
                nn.AdaptiveAvgPool2d((1, 1))
            )
            self.classifier = nn.Linear(112, num_classes)
        
        def forward(self, x):
            x = self.features(x)
            x = torch.flatten(x, 1)
            x = self.classifier(x)
            return x
    
    return EfficientNet(num_classes)


In [None]:
# ============================================================
# CELL 10: Define InceptionV3 Architecture
# ============================================================
# InceptionV3: Network with inception modules (parallel convolutions)
def create_inceptionv3(num_classes=10):
    """InceptionV3 (Simplified)"""
    class InceptionModule(nn.Module):
        def __init__(self, in_channels):
            super(InceptionModule, self).__init__()
            self.branch1 = nn.Conv2d(in_channels, 64, kernel_size=1)
            
            self.branch2 = nn.Sequential(
                nn.Conv2d(in_channels, 48, kernel_size=1),
                nn.Conv2d(48, 64, kernel_size=3, padding=1)
            )
            
            self.branch3 = nn.Sequential(
                nn.Conv2d(in_channels, 64, kernel_size=1),
                nn.Conv2d(64, 96, kernel_size=3, padding=1),
                nn.Conv2d(96, 96, kernel_size=3, padding=1)
            )
            
            self.branch4 = nn.Sequential(
                nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
                nn.Conv2d(in_channels, 32, kernel_size=1)
            )
        
        def forward(self, x):
            branch1 = self.branch1(x)
            branch2 = self.branch2(x)
            branch3 = self.branch3(x)
            branch4 = self.branch4(x)
            return torch.cat([branch1, branch2, branch3, branch4], 1)
    
    class InceptionV3(nn.Module):
        def __init__(self, num_classes):
            super(InceptionV3, self).__init__()
            self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
            self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
            self.inception1 = InceptionModule(192)
            self.inception2 = InceptionModule(256)
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.fc = nn.Linear(256, num_classes)
        
        def forward(self, x):
            x = nn.functional.relu(self.conv1(x))
            x = nn.functional.max_pool2d(x, 2)
            x = nn.functional.relu(self.conv2(x))
            x = nn.functional.max_pool2d(x, 2)
            x = self.inception1(x)
            x = self.inception2(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.fc(x)
            return x
    
    return InceptionV3(num_classes)


In [None]:
# ============================================================
# CELL 11: Define MobileNet Architecture
# ============================================================
# MobileNet: Lightweight network for mobile devices
def create_mobilenet(num_classes=10):
    """MobileNet (Simplified)"""
    class MobileNet(nn.Module):
        def __init__(self, num_classes):
            super(MobileNet, self).__init__()
            
            def conv_bn(inp, oup, stride):
                return nn.Sequential(
                    nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                    nn.BatchNorm2d(oup),
                    nn.ReLU()
                )
            
            def conv_dw(inp, oup, stride):
                return nn.Sequential(
                    nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                    nn.BatchNorm2d(inp),
                    nn.ReLU(),
                    nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                    nn.BatchNorm2d(oup),
                    nn.ReLU()
                )
            
            self.model = nn.Sequential(
                conv_bn(3, 32, 1),
                conv_dw(32, 64, 1),
                conv_dw(64, 128, 2),
                conv_dw(128, 128, 1),
                conv_dw(128, 256, 2),
                conv_dw(256, 256, 1),
                conv_dw(256, 512, 2),
                nn.AdaptiveAvgPool2d(1)
            )
            self.fc = nn.Linear(512, num_classes)
        
        def forward(self, x):
            x = self.model(x)
            x = x.view(-1, 512)
            x = self.fc(x)
            return x
    
    return MobileNet(num_classes)


In [None]:
# ============================================================
# CELL 12: Data Loading Function
# ============================================================
# Function to load and preprocess datasets (MNIST, FashionMNIST, CIFAR-10)
def load_dataset(dataset_name='CIFAR10', batch_size=128):
    """Load dataset with appropriate transforms"""
    
    if dataset_name == 'MNIST':
        transform = transforms.Compose([
            transforms.Grayscale(3),  # Convert to 3 channels
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        train_dataset = torchvision.datasets.MNIST(root='./data', train=True, 
                                                   download=True, transform=transform)
        test_dataset = torchvision.datasets.MNIST(root='./data', train=False, 
                                                  download=True, transform=transform)
        num_classes = 10
        
    elif dataset_name == 'FashionMNIST':
        transform = transforms.Compose([
            transforms.Grayscale(3),
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, 
                                                          download=True, transform=transform)
        test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, 
                                                         download=True, transform=transform)
        num_classes = 10
        
    else:  # CIFAR10
        transform_train = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, padding=4),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                                     download=True, transform=transform_train)
        test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, 
                                                    download=True, transform=transform_test)
        num_classes = 10
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    return train_loader, test_loader, num_classes


In [None]:
# ============================================================
# CELL 13: Define Loss Functions - Focal Loss
# ============================================================
# Focal Loss: Handles class imbalance by focusing on hard examples
class FocalLoss(nn.Module):
    """Focal Loss for handling class imbalance"""
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss(reduction='none')
    
    def forward(self, inputs, targets):
        ce_loss = self.ce(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()


In [None]:
# ============================================================
# CELL 14: Define Loss Functions - ArcFace Loss
# ============================================================
# ArcFace Loss: Creates better feature separation with angular margin
class ArcFaceLoss(nn.Module):
    """ArcFace Loss for better feature discrimination"""
    def __init__(self, in_features, out_features, s=30.0, m=0.50):
        super(ArcFaceLoss, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
        self.ce = nn.CrossEntropyLoss()
    
    def forward(self, features, labels):
        # Normalize features and weights
        features = nn.functional.normalize(features, dim=1)
        weight = nn.functional.normalize(self.weight, dim=1)
        
        # Compute cosine similarity
        cosine = nn.functional.linear(features, weight)
        
        # Add angular margin
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * torch.cos(torch.tensor(self.m)) - sine * torch.sin(torch.tensor(self.m))
        
        # One-hot encode labels
        one_hot = torch.zeros(cosine.size(), device=features.device)
        one_hot.scatter_(1, labels.view(-1, 1).long(), 1)
        
        # Calculate output
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        
        return self.ce(output, labels)


In [None]:
# ============================================================
# CELL 15: Loss Function Helper
# ============================================================
# Helper function to get loss function by name
def get_loss_function(loss_name, num_classes=10, feature_dim=512):
    """Get loss function by name"""
    if loss_name == 'BCE' or loss_name == 'CrossEntropy':
        return nn.CrossEntropyLoss()
    elif loss_name == 'Focal Loss':
        return FocalLoss()
    elif loss_name == 'ArcFace':
        return ArcFaceLoss(feature_dim, num_classes)
    else:
        return nn.CrossEntropyLoss()


In [None]:
# ============================================================
# CELL 16: Training Function
# ============================================================
# Function to train model for one epoch
def train_one_epoch(model, train_loader, criterion, optimizer, device, use_arcface=False):
    """Train model for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in tqdm(train_loader, desc='Training', leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        if use_arcface:
            # For ArcFace, we need features before final classification
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        else:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc


In [None]:
# ============================================================
# CELL 17: Evaluation Function
# ============================================================
# Function to evaluate model on test set
def evaluate_model(model, test_loader, criterion, device, use_arcface=False):
    """Evaluate model on test set"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing', leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            
            if use_arcface:
                loss = criterion(outputs, labels)
            else:
                loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    test_loss = running_loss / len(test_loader)
    test_acc = 100. * correct / total
    
    return test_loss, test_acc


In [None]:
# ============================================================
# CELL 18: Complete Training Pipeline
# ============================================================
# Main function to train and evaluate a model
def train_and_evaluate(model_name, model, train_loader, test_loader, 
                      epochs=10, lr=0.001, optimizer_name='Adam', 
                      loss_name='CrossEntropy', device=device):
    """Complete training and evaluation pipeline"""
    
    print(f"\n{'='*60}")
    print(f"Training {model_name}")
    print(f"Optimizer: {optimizer_name}, Loss: {loss_name}, Epochs: {epochs}")
    print(f"{'='*60}")
    
    model = model.to(device)
    
    # Setup optimizer
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Setup loss function
    use_arcface = (loss_name == 'ArcFace')
    criterion = get_loss_function(loss_name)
    criterion = criterion.to(device)
    
    # Training history
    train_losses = []
    train_accs = []
    test_losses = []
    test_accs = []
    
    start_time = time.time()
    
    for epoch in range(epochs):
        print(f"\nEpoch [{epoch+1}/{epochs}]")
        
        # Train
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, 
                                                optimizer, device, use_arcface)
        
        # Evaluate
        test_loss, test_acc = evaluate_model(model, test_loader, criterion, 
                                            device, use_arcface)
        
        train_losses.append(train_loss)
        train_accs.append(train_acc)
        test_losses.append(test_loss)
        test_accs.append(test_acc)
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")
    
    training_time = time.time() - start_time
    
    results = {
        'model_name': model_name,
        'optimizer': optimizer_name,
        'loss_function': loss_name,
        'epochs': epochs,
        'final_train_acc': train_accs[-1],
        'final_test_acc': test_accs[-1],
        'training_time': training_time,
        'train_losses': train_losses,
        'train_accs': train_accs,
        'test_losses': test_losses,
        'test_accs': test_accs
    }
    
    print(f"\nTraining completed in {training_time:.2f} seconds")
    print(f"Final Test Accuracy: {test_accs[-1]:.2f}%")
    
    return model, results


In [None]:
# ============================================================
# CELL 19: Plotting Function for Model Comparison
# ============================================================
# Function to plot comparison of all models
def plot_model_comparison(results):
    """Plot comparison of all models"""
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    model_names = [r['model_name'] for r in results]
    train_accs = [r['final_train_acc'] for r in results]
    test_accs = [r['final_test_acc'] for r in results]
    times = [r['training_time'] for r in results]
    
    # Accuracy comparison
    x = np.arange(len(model_names))
    width = 0.35
    axes[0].bar(x - width/2, train_accs, width, label='Train Accuracy')
    axes[0].bar(x + width/2, test_accs, width, label='Test Accuracy')
    axes[0].set_xlabel('Models')
    axes[0].set_ylabel('Accuracy (%)')
    axes[0].set_title('Model Accuracy Comparison')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(model_names, rotation=45, ha='right')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Training time comparison
    axes[1].bar(model_names, times, color='coral')
    axes[1].set_xlabel('Models')
    axes[1].set_ylabel('Training Time (seconds)')
    axes[1].set_title('Training Time Comparison')
    axes[1].set_xticklabels(model_names, rotation=45, ha='right')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('part1_comparison.png', dpi=300, bbox_inches='tight')
    print("\nComparison plot saved as 'part1_comparison.png'")
    plt.show()


In [None]:
# ============================================================
# CELL 20: PART 1 - Compare All Models Function
# ============================================================
# Part 1: Compare all CNN architectures on chosen dataset
def part1_compare_all_models(dataset_name='CIFAR10', epochs=10):
    """Part 1: Compare all CNN architectures"""
    
    print("\n" + "="*70)
    print("PART 1: COMPARATIVE ANALYSIS OF CNN ARCHITECTURES")
    print("="*70)
    
    # Load dataset
    train_loader, test_loader, num_classes = load_dataset(dataset_name)
    
    # Define all models
    models_dict = {
        'LeNet-5': create_lenet5(num_classes),
        'AlexNet': create_alexnet(num_classes),
        'VGGNet': create_vggnet(num_classes),
        'ResNet-50': create_resnet50(num_classes),
        'ResNet-100': create_resnet100(num_classes),
        'EfficientNet': create_efficientnet(num_classes),
        'InceptionV3': create_inceptionv3(num_classes),
        'MobileNet': create_mobilenet(num_classes)
    }
    
    all_results = []
    
    # Train each model
    for model_name, model in models_dict.items():
        _, results = train_and_evaluate(
            model_name=model_name,
            model=model,
            train_loader=train_loader,
            test_loader=test_loader,
            epochs=epochs,
            lr=0.001,
            optimizer_name='Adam',
            loss_name='CrossEntropy',
            device=device
        )
        all_results.append(results)
    
    # Display comparison table
    print("\n" + "="*70)
    print("COMPARISON RESULTS")
    print("="*70)
    print(f"{'Model':<15} {'Train Acc':<12} {'Test Acc':<12} {'Time (s)':<12}")
    print("-"*70)
    for result in all_results:
        print(f"{result['model_name']:<15} {result['final_train_acc']:>10.2f}% "
              f"{result['final_test_acc']:>10.2f}% {result['training_time']:>10.2f}")
    
    # Plot comparison
    plot_model_comparison(all_results)
    
    return all_results


In [None]:
# ============================================================
# CELL 21: Plotting Function for Part 2
# ============================================================
# Function to plot Part 2 results
def plot_part2_comparison(results):
    """Plot Part 2 results"""
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Group by dataset
    mnist_results = [r for r in results if r['dataset'] == 'MNIST']
    cifar_results = [r for r in results if r['dataset'] == 'CIFAR10']
    
    # MNIST results
    labels_mnist = [f"{r['config']['model']}\n{r['config']['loss']}" for r in mnist_results]
    train_mnist = [r['final_train_acc'] for r in mnist_results]
    test_mnist = [r['final_test_acc'] for r in mnist_results]
    
    x = np.arange(len(labels_mnist))
    width = 0.35
    axes[0].bar(x - width/2, train_mnist, width, label='Train')
    axes[0].bar(x + width/2, test_mnist, width, label='Test')
    axes[0].set_xlabel('Model + Loss Function')
    axes[0].set_ylabel('Accuracy (%)')
    axes[0].set_title('MNIST Dataset Results')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(labels_mnist, fontsize=8)
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # CIFAR-10 results
    labels_cifar = [f"{r['config']['model']}\n{r['config']['loss']}" for r in cifar_results]
    train_cifar = [r['final_train_acc'] for r in cifar_results]
    test_cifar = [r['final_test_acc'] for r in cifar_results]
    
    x = np.arange(len(labels_cifar))
    axes[1].bar(x - width/2, train_cifar, width, label='Train')
    axes[1].bar(x + width/2, test_cifar, width, label='Test')
    axes[1].set_xlabel('Model + Loss Function')
    axes[1].set_ylabel('Accuracy (%)')
    axes[1].set_title('CIFAR-10 Dataset Results')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(labels_cifar, fontsize=8)
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('part2_comparison.png', dpi=300, bbox_inches='tight')
    print("\nPart 2 comparison plot saved as 'part2_comparison.png'")
    plt.show()


In [None]:
# ============================================================
# CELL 22: PART 2 - Loss Function Comparison
# ============================================================
# Part 2: Compare different loss functions and optimizers
def part2_loss_function_comparison():
    """Part 2: Compare different loss functions and optimizers"""
    
    print("\n" + "="*70)
    print("PART 2: LOSS FUNCTION AND OPTIMIZER COMPARISON")
    print("="*70)
    
    # Configurations as per table
    configurations = [
        {'model': 'VGGNet', 'optimizer': 'Adam', 'epochs': 10, 'loss': 'BCE'},
        {'model': 'AlexNet', 'optimizer': 'SGD', 'epochs': 20, 'loss': 'Focal Loss'},
        {'model': 'ResNet', 'optimizer': 'Adam', 'epochs': 15, 'loss': 'ArcFace'}
    ]
    
    datasets = ['MNIST', 'CIFAR10']
    
    all_part2_results = []
    
    for dataset_name in datasets:
        print(f"\n{'*'*70}")
        print(f"Testing on {dataset_name} Dataset")
        print(f"{'*'*70}")
        
        train_loader, test_loader, num_classes = load_dataset(dataset_name)
        
        for config in configurations:
            # Create model
            if config['model'] == 'VGGNet':
                model = create_vggnet(num_classes)
            elif config['model'] == 'AlexNet':
                model = create_alexnet(num_classes)
            else:  # ResNet
                model = create_resnet50(num_classes)
            
            # Train and evaluate
            _, results = train_and_evaluate(
                model_name=f"{config['model']} ({dataset_name})",
                model=model,
                train_loader=train_loader,
                test_loader=test_loader,
                epochs=config['epochs'],
                lr=0.001 if config['optimizer'] == 'Adam' else 0.01,
                optimizer_name=config['optimizer'],
                loss_name=config['loss'],
                device=device
            )
            
            results['dataset'] = dataset_name
            results['config'] = config
            all_part2_results.append(results)
    
    # Display results table
    print("\n" + "="*70)
    print("PART 2 RESULTS TABLE")
    print("="*70)
    print(f"{'Model':<10} {'Optimizer':<10} {'Epochs':<8} {'Loss Fn':<15} "
          f"{'Dataset':<10} {'Train Acc':<12} {'Test Acc':<12}")
    print("-"*100)
    for result in all_part2_results:
        config = result['config']
        print(f"{config['model']:<10} {config['optimizer']:<10} {config['epochs']:<8} "
              f"{config['loss']:<15} {result['dataset']:<10} "
              f"{result['final_train_acc']:>10.2f}% {result['final_test_acc']:>10.2f}%")
    
    # Plot comparison
    plot_part2_comparison(all_part2_results)
    
    return all_part2_results


In [None]:
# ============================================================
# CELL 23: Feature Extraction for t-SNE
# ============================================================
# Function to extract features from model for visualization
def extract_features(model, data_loader, device, max_samples=1000):
    """Extract features from model for visualization"""
    model.eval()
    features_list = []
    labels_list = []
    
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            
            # Get features from second-to-last layer
            outputs = model(inputs)
            
            features_list.append(outputs.cpu().numpy())
            labels_list.append(labels.numpy())
            
            if len(features_list) * inputs.size(0) >= max_samples:
                break
    
    features = np.concatenate(features_list, axis=0)[:max_samples]
    labels = np.concatenate(labels_list, axis=0)[:max_samples]
    
    return features, labels


In [None]:
# ============================================================
# CELL 24: t-SNE Plotting Function
# ============================================================
# Function to plot t-SNE visualization
def plot_tsne_comparison(models_features):
    """Plot t-SNE visualization for different loss functions"""
    
    fig, axes = plt.subplots(1, len(models_features), figsize=(15, 6))
    
    if len(models_features) == 1:
        axes = [axes]
    
    colors = plt.cm.tab10(np.linspace(0, 1, 10))
    
    for idx, (loss_name, (features, labels)) in enumerate(models_features.items()):
        print(f"Computing t-SNE for {loss_name}...")
        
        # Apply t-SNE
        tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=1000)
        features_2d = tsne.fit_transform(features)
        
        # Plot
        ax = axes[idx]
        for class_idx in range(10):
            mask = labels == class_idx
            ax.scatter(features_2d[mask, 0], features_2d[mask, 1], 
                      c=[colors[class_idx]], label=f'Class {class_idx}',
                      alpha=0.6, s=20)
        
        ax.set_title(f't-SNE Visualization - {loss_name} Loss', fontsize=12)
        ax.set_xlabel('t-SNE Dimension 1')
        ax.set_ylabel('t-SNE Dimension 2')
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('part3_tsne_visualization.png', dpi=300, bbox_inches='tight')
    print("\nt-SNE visualization saved as 'part3_tsne_visualization.png'")
    plt.show()


In [None]:
# ============================================================
# CELL 25: PART 3 - t-SNE Visualization
# ============================================================
# Part 3: Visualize feature clustering using t-SNE
def part3_tsne_visualization():
    """Part 3: Visualize feature clustering using t-SNE"""
    
    print("\n" + "="*70)
    print("PART 3: t-SNE VISUALIZATION OF LOSS FUNCTIONS")
    print("="*70)
    
    # Load CIFAR-10 dataset
    train_loader, test_loader, num_classes = load_dataset('CIFAR10')
    
    # Train models with different loss functions
    loss_functions = ['BCE', 'ArcFace']
    models_features = {}
    
    for loss_fn in loss_functions:
        print(f"\nTraining model with {loss_fn} loss...")
        
        model = create_vggnet(num_classes)
        model, _ = train_and_evaluate(
            model_name=f'VGGNet-{loss_fn}',
            model=model,
            train_loader=train_loader,
            test_loader=test_loader,
            epochs=5,  # Reduced epochs for faster visualization
            lr=0.001,
            optimizer_name='Adam',
            loss_name=loss_fn,
            device=device
        )
        
        # Extract features
        print(f"Extracting features for {loss_fn}...")
        features, labels = extract_features(model, test_loader, device, max_samples=1000)
        models_features[loss_fn] = (features, labels)
    
    # Create t-SNE visualization
    plot_tsne_comparison(models_features)
    
    return models_features


In [None]:
# ============================================================
# CELL 26: Main Function to Run All Parts
# ============================================================
# Main function - Run all parts of the practical
def main():
    """Main function to run all parts"""
    
    print("="*70)
    print("DEEP LEARNING PRACTICAL - 3")
    print("Comparative Analysis of CNN Architectures")
    print("="*70)
    
    # Choose dataset for Part 1
    dataset_choice = 'CIFAR10'  # Options: 'MNIST', 'FashionMNIST', 'CIFAR10'
    
    # PART 1: Compare all models
    print("\nStarting Part 1...")
    part1_results = part1_compare_all_models(dataset_name=dataset_choice, epochs=10)
    
    # PART 2: Loss function and optimizer comparison
    print("\nStarting Part 2...")
    part2_results = part2_loss_function_comparison()
    
    # PART 3: t-SNE visualization
    print("\nStarting Part 3...")
    part3_results = part3_tsne_visualization()
    
    print("\n" + "="*70)
    print("ALL TASKS COMPLETED SUCCESSFULLY!")
    print("="*70)
    print("\nGenerated files:")
    print("1. part1_comparison.png - Model comparison plots")
    print("2. part2_comparison.png - Loss function comparison")
    print("3. part3_tsne_visualization.png - t-SNE feature visualization")
    print("\nCheck the output for detailed results and accuracy metrics.")


In [None]:
# ============================================================
# CELL 27: RUN THE PRACTICAL
# ============================================================
# Execute the main function to run all parts
# WARNING: This will take 1.5-2 hours to complete on GPU
# For quick testing, modify epochs in part1_compare_all_models() and part3_tsne_visualization()

main()


In [None]:
# ============================================================
# CELL 28 (OPTIONAL): Quick Test - Part 1 Only (3 Models)
# ============================================================
# Uncomment and run this cell for quick testing with just 3 models
# This will complete in about 15-20 minutes

"""
print("QUICK TEST - Running Part 1 with 3 models only")
train_loader, test_loader, num_classes = load_dataset('CIFAR10')

models_dict = {
    'LeNet-5': create_lenet5(num_classes),
    'VGGNet': create_vggnet(num_classes),
    'ResNet-50': create_resnet50(num_classes)
}

all_results = []

for model_name, model in models_dict.items():
    _, results = train_and_evaluate(
        model_name=model_name,
        model=model,
        train_loader=train_loader,
        test_loader=test_loader,
        epochs=3,  # Reduced epochs
        lr=0.001,
        optimizer_name='Adam',
        loss_name='CrossEntropy',
        device=device
    )
    all_results.append(results)

plot_model_comparison(all_results)
"""


In [None]:
# ============================================================
# CELL 29 (OPTIONAL): Quick Test - Part 2 Only (MNIST)
# ============================================================
# Uncomment and run this cell for quick Part 2 testing on MNIST only

"""
print("QUICK TEST - Running Part 2 on MNIST only")

configurations = [
    {'model': 'VGGNet', 'optimizer': 'Adam', 'epochs': 5, 'loss': 'BCE'},
    {'model': 'AlexNet', 'optimizer': 'SGD', 'epochs': 5, 'loss': 'Focal Loss'},
]

train_loader, test_loader, num_classes = load_dataset('MNIST')
all_results = []

for config in configurations:
    if config['model'] == 'VGGNet':
        model = create_vggnet(num_classes)
    else:
        model = create_alexnet(num_classes)
    
    _, results = train_and_evaluate(
        model_name=f"{config['model']} (MNIST)",
        model=model,
        train_loader=train_loader,
        test_loader=test_loader,
        epochs=config['epochs'],
        lr=0.001 if config['optimizer'] == 'Adam' else 0.01,
        optimizer_name=config['optimizer'],
        loss_name=config['loss'],
        device=device
    )
    results['dataset'] = 'MNIST'
    results['config'] = config
    all_results.append(results)
"""


In [None]:
# ============================================================
# CELL 30 (OPTIONAL): Quick Test - Part 3 Only
# ============================================================
# Uncomment and run this cell for quick Part 3 testing

"""
print("QUICK TEST - Running Part 3 with 2 epochs")

train_loader, test_loader, num_classes = load_dataset('CIFAR10')
loss_functions = ['BCE', 'ArcFace']
models_features = {}

for loss_fn in loss_functions:
    model = create_vggnet(num_classes)
    model, _ = train_and_evaluate(
        model_name=f'VGGNet-{loss_fn}',
        model=model,
        train_loader=train_loader,
        test_loader=test_loader,
        epochs=2,  # Reduced epochs
        lr=0.001,
        optimizer_name='Adam',
        loss_name=loss_fn,
        device=device
    )
    
    features, labels = extract_features(model, test_loader, device, max_samples=500)
    models_features[loss_fn] = (features, labels)

plot_tsne_comparison(models_features)
"""
