In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torchvision
import torchvision.transforms as transforms
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print("Libraries imported successfully!")

In [None]:
# Download and load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, 
                                          download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, 
                                         download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Input shape: {train_dataset[0][0].shape}")

In [None]:
# Get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

plt.figure(figsize=(10, 4))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    img = images[i].squeeze().numpy()
    plt.imshow(img, cmap='gray')
    plt.title(f'Label: {labels[i].item()}')
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
class CNN(nn.Module):
    def __init__(self, activation='relu', fc_units=128, dropout_rate=0.25):
        super(CNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        
        # Max pooling
        self.pool = nn.MaxPool2d(2, 2)
        
        # Dropout
        self.dropout1 = nn.Dropout(dropout_rate)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 14 * 14, fc_units)
        self.fc2 = nn.Linear(fc_units, 10)
        
        # Set activation function
        if activation == 'sigmoid':
            self.activation = torch.sigmoid
        elif activation == 'tanh':
            self.activation = torch.tanh
        elif activation == 'relu':
            self.activation = F.relu
        else:
            self.activation = F.relu
    
    def forward(self, x):
        # Conv layers with activation
        x = self.activation(self.conv1(x))
        x = self.activation(self.conv2(x))
        
        # Max pooling
        x = self.pool(x)
        
        # Dropout
        x = self.dropout1(x)
        
        # Flatten
        x = x.view(-1, 64 * 14 * 14)
        
        # Fully connected layers
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [None]:
class MLP(nn.Module):
    def __init__(self, activation='relu', hidden_units=[256, 128], 
                 use_batch_norm=True, dropout_rate=0.25):
        super(MLP, self).__init__()
        
        self.use_batch_norm = use_batch_norm
        self.dropout_rate = dropout_rate
        
        # Create layers dynamically
        layers = []
        input_size = 784
        
        for units in hidden_units:
            layers.append(nn.Linear(input_size, units))
            if use_batch_norm:
                layers.append(nn.BatchNorm1d(units))
            input_size = units
        
        self.hidden_layers = nn.ModuleList(layers)
        self.output_layer = nn.Linear(input_size, 10)
        
        # Set activation function
        if activation == 'sigmoid':
            self.activation = torch.sigmoid
        elif activation == 'tanh':
            self.activation = torch.tanh
        elif activation == 'relu':
            self.activation = F.relu
        else:
            self.activation = F.relu
        
        # Dropout
        if dropout_rate > 0:
            self.dropout = nn.Dropout(dropout_rate)
        else:
            self.dropout = None
    
    def forward(self, x):
        # Flatten input
        x = x.view(-1, 784)
        
        # Pass through hidden layers
        for layer in self.hidden_layers:
            x = layer(x)
            if not isinstance(layer, nn.BatchNorm1d):
                x = self.activation(x)
                if self.dropout is not None:
                    x = self.dropout(x)
        
        # Output layer
        x = self.output_layer(x)
        
        return x

In [None]:
def train_epoch(model, device, train_loader, optimizer, criterion):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = output.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
    
    avg_loss = train_loss / len(train_loader)
    accuracy = 100. * correct / total
    
    return avg_loss, accuracy

def evaluate(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            
            test_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
    
    avg_loss = test_loss / len(test_loader)
    accuracy = 100. * correct / total
    
    return avg_loss, accuracy

def train_model(model, device, train_loader, test_loader, optimizer, epochs=10):
    criterion = nn.CrossEntropyLoss()
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    for epoch in range(epochs):
        train_loss, train_acc = train_epoch(model, device, train_loader, optimizer, criterion)
        val_loss, val_acc = evaluate(model, device, test_loader, criterion)
        
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        
        print(f'Epoch {epoch+1}/{epochs} - '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% - '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
    
    return history

In [None]:
print("=" * 60)
print("TASK 1: ACTIVATION FUNCTION CHALLENGE")
print("=" * 60)

activation_results = []
activation_histories = {}

activations = ['sigmoid', 'tanh', 'relu']

for act in activations:
    print(f"\n{'='*50}")
    print(f"Training CNN with {act.upper()} activation")
    print(f"{'='*50}")
    
    model = CNN(activation=act, fc_units=128, dropout_rate=0.25).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    history = train_model(model, device, train_loader, test_loader, optimizer, epochs=10)
    
    # Store results
    final_acc = history['val_acc'][-1]
    activation_results.append({
        'Activation': act,
        'Optimizer': 'Adam',
        'Epochs': 10,
        'Final Train Accuracy': f"{history['train_acc'][-1]:.2f}%",
        'Final Test Accuracy': f"{final_acc:.2f}%"
    })
    
    activation_histories[act] = history

print("\n" + "="*60)
print("TASK 1 RESULTS")
print("="*60)
df_activation = pd.DataFrame(activation_results)
print(df_activation.to_string(index=False))

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Training Accuracy
for act in activations:
    axes[0, 0].plot(activation_histories[act]['train_acc'], label=act.upper(), marker='o')
axes[0, 0].set_title('Training Accuracy Comparison', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy (%)')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Validation Accuracy
for act in activations:
    axes[0, 1].plot(activation_histories[act]['val_acc'], label=act.upper(), marker='o')
axes[0, 1].set_title('Validation Accuracy Comparison', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy (%)')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Training Loss
for act in activations:
    axes[1, 0].plot(activation_histories[act]['train_loss'], label=act.upper(), marker='o')
axes[1, 0].set_title('Training Loss Comparison', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Loss')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Validation Loss
for act in activations:
    axes[1, 1].plot(activation_histories[act]['val_loss'], label=act.upper(), marker='o')
axes[1, 1].set_title('Validation Loss Comparison', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
print("\n" + "=" * 60)
print("TASK 2: OPTIMIZER SHOWDOWN (Using ReLU)")
print("=" * 60)

optimizer_results = []
optimizer_histories = {}

# Define optimizer configurations
optimizer_configs = {
    'SGD': lambda params: optim.SGD(params, lr=0.01),
    'SGD_Momentum': lambda params: optim.SGD(params, lr=0.01, momentum=0.9),
    'Adam': lambda params: optim.Adam(params, lr=0.001)
}

for opt_name, opt_func in optimizer_configs.items():
    print(f"\n{'='*50}")
    print(f"Training CNN with {opt_name}")
    print(f"{'='*50}")
    
    model = CNN(activation='relu', fc_units=128, dropout_rate=0.25).to(device)
    optimizer = opt_func(model.parameters())
    
    history = train_model(model, device, train_loader, test_loader, optimizer, epochs=10)
    
    # Store results
    final_acc = history['val_acc'][-1]
    optimizer_results.append({
        'Activation': 'ReLU',
        'Optimizer': opt_name,
        'Epochs': 10,
        'Final Train Accuracy': f"{history['train_acc'][-1]:.2f}%",
        'Final Test Accuracy': f"{final_acc:.2f}%"
    })
    
    optimizer_histories[opt_name] = history

print("\n" + "="*60)
print("TASK 2 RESULTS")
print("="*60)
df_optimizer = pd.DataFrame(optimizer_results)
print(df_optimizer.to_string(index=False))

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Training Accuracy
for opt_name in optimizer_configs.keys():
    axes[0, 0].plot(optimizer_histories[opt_name]['train_acc'], label=opt_name, marker='o')
axes[0, 0].set_title('Training Accuracy - Optimizer Comparison', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy (%)')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Validation Accuracy
for opt_name in optimizer_configs.keys():
    axes[0, 1].plot(optimizer_histories[opt_name]['val_acc'], label=opt_name, marker='o')
axes[0, 1].set_title('Validation Accuracy - Optimizer Comparison', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy (%)')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Training Loss
for opt_name in optimizer_configs.keys():
    axes[1, 0].plot(optimizer_histories[opt_name]['train_loss'], label=opt_name, marker='o')
axes[1, 0].set_title('Training Loss - Optimizer Comparison', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Loss')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Validation Loss
for opt_name in optimizer_configs.keys():
    axes[1, 1].plot(optimizer_histories[opt_name]['val_loss'], label=opt_name, marker='o')
axes[1, 1].set_title('Validation Loss - Optimizer Comparison', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
print("\n" + "=" * 60)
print("TASK 3: BATCH NORMALIZATION & DROPOUT IMPACT")
print("=" * 60)

task3_results = []
task3_histories = {}

# Scenario configurations
scenarios = [
    {'name': 'Without BN, Dropout=0', 'use_bn': False, 'dropout': 0.0},
    {'name': 'Without BN, Dropout=0.1', 'use_bn': False, 'dropout': 0.1},
    {'name': 'With BN, Dropout=0.25', 'use_bn': True, 'dropout': 0.25}
]

for scenario in scenarios:
    print(f"\n{'='*50}")
    print(f"Training MLP: {scenario['name']}")
    print(f"{'='*50}")
    
    model = MLP(
        activation='relu',
        hidden_units=[256, 128],
        use_batch_norm=scenario['use_bn'],
        dropout_rate=scenario['dropout']
    ).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    history = train_model(model, device, train_loader, test_loader, optimizer, epochs=15)
    
    # Store results
    final_acc = history['val_acc'][-1]
    task3_results.append({
        'Scenario': scenario['name'],
        'Batch Norm': 'Yes' if scenario['use_bn'] else 'No',
        'Dropout': scenario['dropout'],
        'Final Train Accuracy': f"{history['train_acc'][-1]:.2f}%",
        'Final Test Accuracy': f"{final_acc:.2f}%"
    })
    
    task3_histories[scenario['name']] = history

print("\n" + "="*60)
print("TASK 3 RESULTS")
print("="*60)
df_task3 = pd.DataFrame(task3_results)
print(df_task3.to_string(index=False))

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Training Accuracy
for scenario_name in task3_histories.keys():
    axes[0, 0].plot(task3_histories[scenario_name]['train_acc'], label=scenario_name, marker='o')
axes[0, 0].set_title('Training Accuracy - BN & Dropout Impact', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy (%)')
axes[0, 0].legend(fontsize=8)
axes[0, 0].grid(True, alpha=0.3)

# Validation Accuracy
for scenario_name in task3_histories.keys():
    axes[0, 1].plot(task3_histories[scenario_name]['val_acc'], label=scenario_name, marker='o')
axes[0, 1].set_title('Validation Accuracy - BN & Dropout Impact', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy (%)')
axes[0, 1].legend(fontsize=8)
axes[0, 1].grid(True, alpha=0.3)

# Training Loss
for scenario_name in task3_histories.keys():
    axes[1, 0].plot(task3_histories[scenario_name]['train_loss'], label=scenario_name, marker='o')
axes[1, 0].set_title('Training Loss - BN & Dropout Impact', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Loss')
axes[1, 0].legend(fontsize=8)
axes[1, 0].grid(True, alpha=0.3)

# Validation Loss
for scenario_name in task3_histories.keys():
    axes[1, 1].plot(task3_histories[scenario_name]['val_loss'], label=scenario_name, marker='o')
axes[1, 1].set_title('Validation Loss - BN & Dropout Impact', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend(fontsize=8)
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
print("\n" + "=" * 60)
print("TRAINING REQUIRED MODELS FROM TABLE")
print("=" * 60)

required_models_results = []
required_models_histories = {}

# CNN-1: 128 FC, Adam, 10 epochs
print("\n--- Training CNN-1 ---")
cnn1 = CNN(activation='relu', fc_units=128, dropout_rate=0.25).to(device)
optimizer_cnn1 = optim.Adam(cnn1.parameters(), lr=0.001)
history_cnn1 = train_model(cnn1, device, train_loader, test_loader, optimizer_cnn1, epochs=10)
required_models_results.append({
    'Model': 'CNN-1',
    'FC Layer': '128',
    'Optimizer': 'Adam',
    'Epochs': 10,
    'Accuracy': f"{history_cnn1['val_acc'][-1]:.2f}%"
})
required_models_histories['CNN-1'] = history_cnn1

# MLP-1: 512-256-128, SGD, 20 epochs
print("\n--- Training MLP-1 ---")
mlp1 = MLP(activation='relu', hidden_units=[512, 256, 128], 
           use_batch_norm=True, dropout_rate=0.25).to(device)
optimizer_mlp1 = optim.SGD(mlp1.parameters(), lr=0.01)
history_mlp1 = train_model(mlp1, device, train_loader, test_loader, optimizer_mlp1, epochs=20)
required_models_results.append({
    'Model': 'MLP-1',
    'FC Layer': '512-256-128',
    'Optimizer': 'SGD',
    'Epochs': 20,
    'Accuracy': f"{history_mlp1['val_acc'][-1]:.2f}%"
})
required_models_histories['MLP-1'] = history_mlp1

# MLP-2: 256, Adam, 15 epochs
print("\n--- Training MLP-2 ---")
mlp2 = MLP(activation='relu', hidden_units=[256], 
           use_batch_norm=True, dropout_rate=0.25).to(device)
optimizer_mlp2 = optim.Adam(mlp2.parameters(), lr=0.001)
history_mlp2 = train_model(mlp2, device, train_loader, test_loader, optimizer_mlp2, epochs=15)
required_models_results.append({
    'Model': 'MLP-2',
    'FC Layer': '256',
    'Optimizer': 'Adam',
    'Epochs': 15,
    'Accuracy': f"{history_mlp2['val_acc'][-1]:.2f}%"
})
required_models_histories['MLP-2'] = history_mlp2

print("\n" + "="*60)
print("REQUIRED MODELS RESULTS")
print("="*60)
df_required = pd.DataFrame(required_models_results)
print(df_required.to_string(index=False))

In [None]:
print("\n" + "=" * 60)
print("COMPREHENSIVE EXPERIMENT RESULTS TABLE")
print("=" * 60)

all_results = []

# Add Task 1 results
for i, result in enumerate(activation_results, 1):
    all_results.append({
        'Experiment': f'Task1-{i}',
        'Activation': result['Activation'],
        'Optimizer': result['Optimizer'],
        'Epochs': result['Epochs'],
        'Final Accuracy': result['Final Test Accuracy']
    })

# Add Task 2 results
for i, result in enumerate(optimizer_results, 1):
    all_results.append({
        'Experiment': f'Task2-{i}',
        'Activation': result['Activation'],
        'Optimizer': result['Optimizer'],
        'Epochs': result['Epochs'],
        'Final Accuracy': result['Final Test Accuracy']
    })

df_all_results = pd.DataFrame(all_results)
print(df_all_results.to_string(index=False))

In [None]:
print("\n" + "=" * 70)
print("SUMMARY AND OBSERVATIONS")
print("=" * 70)

print("\n1. ACTIVATION FUNCTION ANALYSIS:")
print("   - Sigmoid: Prone to vanishing gradients, slower convergence")
print("   - Tanh: Better than sigmoid, zero-centered, but still slower than ReLU")
print("   - ReLU: Fastest convergence, no vanishing gradient for positive values")

print("\n2. OPTIMIZER ANALYSIS:")
print("   - SGD: Stable but slower convergence")
print("   - SGD with Momentum: Smoother convergence, handles local minima better")
print("   - Adam: Fastest convergence with adaptive learning rates")

print("\n3. BATCH NORMALIZATION & DROPOUT:")
print("   - Without BN/Dropout: May overfit, unstable training")
print("   - Dropout helps prevent overfitting")
print("   - Batch Normalization speeds up training and improves stability")

print("\n" + "=" * 70)
print("PRACTICAL COMPLETED SUCCESSFULLY!")
print("=" * 70)

In [None]:
# Save the best performing model
torch.save(cnn1.state_dict(), 'best_cnn_model.pth')
print("\nBest CNN model saved as 'best_cnn_model.pth'")