In [1]:
# importing the packages 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import fetch_openml
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader


import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau

Baseline model 

In [2]:
# Step 1: Data Access and Preprocessing
# Load MNIST dataset
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
y = y.astype(int)  # Ensure labels are integers

# Split the dataset into training and test sets
X_test = X[-10000:]
y_test = y[-10000:]
X_train_full = X[:-10000]
y_train_full = y[:-10000]

# Normalize pixel values to the range [0, 1]
scaler = MinMaxScaler()
X_train_full = scaler.fit_transform(X_train_full)
X_test = scaler.transform(X_test)

# Step 2: Cross-Validation Function
def cross_validate_model(model, X, y, model_name, cv=5):
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)
    scores = cross_val_score(model, X, y, cv=skf, scoring='accuracy')
    print(f"\nCross-Validation Scores for {model_name}: {scores}")
    print(f"Mean Accuracy for {model_name}: {scores.mean():.4f}")
    return scores.mean() 

# Step 3: Logistic Regression
print("\nCross-validating Logistic Regression...")
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_accuracy = cross_validate_model(lr_model, X_train_full, y_train_full, "Logistic Regression")

# Step 4: Linear Discriminant Analysis
print("\nCross-validating Linear Discriminant Analysis...")
lda_model = LinearDiscriminantAnalysis()
lda_accuracy = cross_validate_model(lda_model, X_train_full, y_train_full, "Linear Discriminant Analysis")

# Step 5: Decision Tree
print("\nCross-validating Decision Tree...")
dt_model = DecisionTreeClassifier(random_state=42)
dt_accuracy = cross_validate_model(dt_model, X_train_full, y_train_full, "Decision Tree")

# Step 6: Random Forest
print("\nCross-validating Random Forest...")
rf_model = RandomForestClassifier(random_state=42)
rf_accuracy = cross_validate_model(rf_model, X_train_full, y_train_full, "Random Forest")

# Step 7: Neural Network
print("\nCross-validating Neural Network...")
mlp_model = MLPClassifier(max_iter=50, random_state=42)
mlp_accuracy = cross_validate_model(mlp_model, X_train_full, y_train_full, "Neural Network")

# Step 8: Final Visualization of All Models
cv_results = {
    'Logistic Regression': lr_accuracy,
    'Linear Discriminant Analysis': lda_accuracy,
    'Decision Tree': dt_accuracy,
    'Random Forest': rf_accuracy,
    'Neural Network': mlp_accuracy
}

cv_results_df = pd.DataFrame(list(cv_results.items()), columns=['Model', 'Mean Cross-Validation Accuracy'])
cv_results_df.sort_values(by='Mean Cross-Validation Accuracy', ascending=False, inplace=True)

# Visualize cross-validation performance
plt.figure(figsize=(10, 6))
sns.barplot(x='Mean Cross-Validation Accuracy', y='Model', data=cv_results_df, palette='viridis')
plt.title('Model Performance Comparison (Cross-Validation)')
plt.xlabel('Mean Cross-Validation Accuracy')
plt.ylabel('Model')
plt.xlim(0.8, 1.0)  # Adjust based on expected accuracy
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.show()

# Print final comparison table
print("\nFinal Model Performance (Mean Cross-Validation Accuracy):")
print(cv_results_df)

  warn(


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)>

Getting better score with Enhanced LeNet-5

In [None]:
# Data augmentation and normalization for the training set
train_transform = transforms.Compose([
    transforms.RandomRotation(10),  # Rotate by 10 degrees
    transforms.RandomAffine(0, translate=(0.1, 0.1)),  # Random shifts (10%)
    transforms.RandomResizedCrop(28, scale=(0.9, 1.1)),  # Random zoom (10%)
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Normalization for the test set (no augmentation)
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download and load MNIST dataset with augmentation on training set
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=test_transform)

# Create DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [None]:
# Define the Enhanced LeNet5 class
class EnhancedLeNet5(nn.Module):
    def __init__(self, l2_lambda=0.005):
        super(EnhancedLeNet5, self).__init__()

        # First convolutional layer (increased filters from 6 to 32)
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, padding=2)  # Added padding
        self.bn1 = nn.BatchNorm2d(32)  # Batch Normalization
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)  # Added padding
        self.bn2 = nn.BatchNorm2d(64)  # Batch Normalization

        # New Convolutional Layers
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)  # Added padding
        self.bn3 = nn.BatchNorm2d(128)  # Batch Normalization
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # Added padding
        self.bn4 = nn.BatchNorm2d(256)  # Batch Normalization

        # Fully connected layers (increased hidden units)
        self.fc1 = nn.Linear(256 * 1 * 1, 1024)  
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

        # Dropout layers after FC layers
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.25) 
        
    def forward(self, x):
        # Convolutional layers with BatchNorm, Tanh activation, Max Pooling
        x = torch.tanh(self.bn1(self.conv1(x)))
        x = torch.max_pool2d(x, 2)
        x = torch.tanh(self.bn2(self.conv2(x)))
        x = torch.max_pool2d(x, 2)

        # Additional Convolutional Layers
        x = torch.tanh(self.bn3(self.conv3(x)))
        x = torch.max_pool2d(x, 2)
        x = torch.tanh(self.bn4(self.conv4(x)))
        x = torch.max_pool2d(x, 2)

        # Flatten the tensor
        x = x.view(x.size(0), -1)  # Flatten dynamically based on input size

        # Fully connected layers with Dropout
        x = torch.tanh(self.fc1(x))
        x = self.dropout1(x)
        x = torch.tanh(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)  # Output layer

        return x

transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,)) 
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = EnhancedLeNet5(l2_lambda=0.005)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.005)

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)


In [None]:
def train_model(model, train_loader, criterion, optimizer, scheduler, epochs=10):
    model.train()
    train_losses = []
    train_accuracies = []
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()


        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total

        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_accuracy)

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

        scheduler.step(epoch_loss)

    return train_losses, train_accuracies

In [None]:
def evaluate_model(model, test_loader):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy, all_labels, all_preds

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


train_losses, train_accuracies = train_model(model, train_loader, criterion, optimizer, scheduler, epochs=100)
test_accuracy, all_labels, all_predictions = evaluate_model(model, test_loader)

In [None]:
# Identify misclassified samples for the best model
misclassified_idx = np.where(torch.Tensor(all_predictions) != torch.Tensor(all_labels))[0]

num_samples = min(10, len(misclassified_idx))  
plt.figure(figsize=(15, 8))
for i, idx in enumerate(misclassified_idx[:num_samples]):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.title(f"True: {all_labels[idx]}, Pred: {all_predictions[idx]}")
    plt.axis('off')
plt.suptitle(f'Examples of Misclassified Digits by {"Neural Network"}', fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show() 

In [None]:
# Confusion Matrix for Test Set
cm = confusion_matrix(all_labels, all_predictions)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
plt.title(f'Confusion Matrix - {"Neural Network"} on Test Set')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()