In [1]:
# import necessary libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import Compose, Rotate, RandomResizedCrop, Normalize
from sklearn.metrics import roc_auc_score, confusion_matrix
import gradio as gr
from PIL import Image
import torchvision
import time
from itertools import product

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

  check_for_updates()


cuda


# 1. Data Processing and Augmentation

In [2]:
# Transformations for the training and test sets
def get_transforms():
    train_transform = A.Compose([
        A.RandomResizedCrop(height=32, width=32, scale=(0.8, 1.0)),  # Random Resized Crop
        A.Rotate(limit=30),  # Random Rotate (±30 degrees)
        A.OneOf([  # Random Horizontal Flip or Vertical Flip
            A.MotionBlur(p=0.2),  # Motion Blur
            A.MedianBlur(blur_limit=3, p=0.1),  # Median Blur
            A.GaussianBlur(blur_limit=3, p=0.1),  # Gaussian Blur
        ], p=0.3),
        A.OneOf([  # Random Brightness or Contrast
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
        ], p=0.3),
        A.CoarseDropout(max_holes=1, max_height=16, max_width=16, fill_value=0, p=0.5),  # Random Coarse Dropout
        A.Resize(32, 32),  # Resize to 32x32
        A.Normalize(mean=[0.4377, 0.4438, 0.4728], std=[0.1980, 0.2010, 0.1970]),  # Normalize
        ToTensorV2()  # Convert to Tensor
    ])
    
    test_transform = A.Compose([
        A.Resize(32, 32),  # Resize to 32x32
        A.Normalize(mean=[0.4377, 0.4438, 0.4728], std=[0.1980, 0.2010, 0.1970]),  # Normalize
        ToTensorV2()  # Convert to Tensor
    ])
    
    return train_transform, test_transform

def load_svhn_data(batch_size, train_transform, test_transform):
    # Load the SVHN dataset
    train_dataset = datasets.SVHN('./data', split='train', download=True, transform=lambda img: train_transform(image=np.array(img))['image'])
    test_dataset = datasets.SVHN('./data', split='test', download=True, transform=lambda img: test_transform(image=np.array(img))['image'])
    
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, drop_last=True)
    
    return train_loader, test_loader

# 2. Neural Network Setup

In [3]:
# Define the VGG model
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1), nn.GroupNorm(2, 8), nn.ReLU(),
            nn.Conv2d(8, 16, kernel_size=3, padding=1), nn.GroupNorm(4, 16), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout(0.25),  # 14x14

            nn.Conv2d(16, 32, kernel_size=3, padding=1), nn.GroupNorm(8, 32), nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.GroupNorm(8, 32), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout(0.25),  # 7x7

            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.GroupNorm(8, 32), nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.GroupNorm(8, 32), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout(0.25)   # 3x3
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(32 * 4 * 4, 256), nn.GroupNorm(32, 256), nn.ReLU(),
            nn.Dropout(0.5),  # Add Dropout here
            nn.Linear(256, 10)  # Output layer (10 classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc_layers(x)
        return x

# Initialize model, loss function, and optimizer with L2 regularization
model = VGG().to(device)
criterion = nn.CrossEntropyLoss()

# L2 regularization is applied via the 'weight_decay' parameter in the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# 3. Training and Evaluation Functions

In [4]:
# Training, evaluation, and helper functions
def _train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    epoch_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * len(images)
    return epoch_loss / len(train_loader.dataset)

def _evaluate(model, test_loader, criterion, device):
    model.eval()
    epoch_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_outputs = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            epoch_loss += loss.item() * len(images)
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_labels.extend(labels.cpu().numpy())
            all_outputs.extend(outputs.cpu().numpy())
    
    accuracy = correct / total
    epoch_loss = epoch_loss / len(test_loader.dataset)

    # Calculate ROC AUC
    all_labels = np.array(all_labels)
    all_outputs = np.array(all_outputs)
    all_labels_one_hot = np.eye(10)[all_labels]

    macro_roc_auc = roc_auc_score(all_labels_one_hot, all_outputs, average='macro', multi_class='ovr')
    micro_roc_auc = roc_auc_score(all_labels_one_hot, all_outputs, average='micro', multi_class='ovr')

    return epoch_loss, accuracy, macro_roc_auc, micro_roc_auc, all_labels, all_outputs

# 4. Analysis of Training and Evaluation(Experiments)

## 4.1 Define the ExperimentRunner Class

In [5]:
# Train the model
class ExperimentRunner:
    def __init__(self, config):
        self.config = config
        self.model = VGG().to(device)
        self.loss_function = nn.CrossEntropyLoss()
        self.optimizer = self._select_optimizer(config['optimizer'])
        
        train_transform, test_transform = get_transforms()
        self.train_loader, self.test_loader = load_svhn_data(config['batch_size'], train_transform, test_transform)
        
        self.train_losses = []
        self.test_losses = []
        self.accuracies = []
        self.macro_roc_aucs = []
        self.micro_roc_aucs = []
        self.class_roc_aucs = []

    def _select_optimizer(self, optimizer):
        if optimizer == 'adam':
            return optim.Adam(self.model.parameters(), lr=self.config['learning_rate'])
        elif optimizer == 'sgd':
            return optim.SGD(self.model.parameters(), lr=self.config['learning_rate'], momentum=0.9)
        else:
            raise ValueError(f"Unknown optimizer: {optimizer}")

    def train(self):
        for epoch in range(self.config['num_epochs']):
            start_time = time.time()
            # Train using the _train_epoch function from the first cell
            train_loss = _train_epoch(self.model, self.train_loader, self.loss_function, self.optimizer, device)
            self.train_losses.append(train_loss)

            # Test using the _evaluate function from the first cell
            test_loss, accuracy, macro_roc_auc, micro_roc_auc, all_labels, all_outputs = _evaluate(
                self.model, self.test_loader, self.loss_function, device
            )
            self.test_losses.append(test_loss)
            self.accuracies.append(accuracy)
            self.macro_roc_aucs.append(macro_roc_auc)
            self.micro_roc_aucs.append(micro_roc_auc)

            print(f"Epoch {epoch + 1}/{self.config['num_epochs']}, Train Loss: {train_loss:.4f}, "
                  f"Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.4f}, Macro ROC AUC: {macro_roc_auc:.4f}, "
                  f"Micro ROC AUC: {micro_roc_auc:.4f}")
            
            # Calculate confusion matrix
            predicted_labels = np.argmax(all_outputs, axis=1)
            cm = confusion_matrix(all_labels, predicted_labels)
            print("Confusion Matrix:", cm)
            
            # Save the model
            torch.save(self.model.state_dict(), 'trained_vgg.pth')

            # Calculate class-wise ROC AUC
            all_labels_one_hot = np.eye(10)[all_labels]
            class_roc_auc = roc_auc_score(all_labels_one_hot, all_outputs, average=None, multi_class='ovr')
            self.class_roc_aucs.append(class_roc_auc)
            
            end_time = time.time()
            epoch_time = end_time - start_time
            print(f"Epoch {epoch + 1} / {self.config['num_epochs']}, Time: {epoch_time:.2f} s")

## 4.2 Run Experiments

In [None]:
# Define the experiment configurations
learning_rates = [0.0001, 0.001]    # Learning rates from 0.0001 to 0.001
batch_sizes = [32, 64]          # Batch sizes of 32 and 64
num_epochs_list = [5, 10, 20]         # Number of epochs
optimizers = ['adam', 'sgd']              # Adam and SGD optimizers
augmentations = [
    {'rotate': 30, 'scale_min': 0.8, 'scale_max': 1.0, 'min_ratio': 0.75, 'max_ratio': 1.33},
    {'rotate': 15, 'scale_min': 0.9, 'scale_max': 1.0, 'min_ratio': 0.85, 'max_ratio': 1.23}
]

# Generate all possible experiment configurations
experiment_configs = [
    {
        'learning_rate': lr,
        'batch_size': bs,
        'num_epochs': ne,
        'optimizer': opt,
        'augmentation': aug
    }
    for lr, bs, ne, opt, aug in product(learning_rates, batch_sizes, num_epochs_list, optimizers, augmentations)
]

# Run the experiments
results = []
for config in experiment_configs:
    print(f"Running experiment with config: {config}")
    runner = ExperimentRunner(config)
    runner.train()
    results.append(runner)

# Visualize the results
for result in results:
    config = result.config
    plt.figure(figsize=(18, 6))
    plt.suptitle(f"Experiment Config: {config}")

    # Plot the training and test losses
    plt.subplot(1, 3, 1)
    plt.plot(result.train_losses, label="Train Loss")
    plt.plot(result.test_losses, label="Test Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training and Test Loss")
    plt.legend()

    # Plot the accuracy and ROC AUC
    plt.subplot(1, 3, 2)
    plt.plot(result.accuracies, label="Accuracy")
    plt.plot(result.macro_roc_aucs, label="Macro ROC AUC")
    plt.plot(result.micro_roc_aucs, label="Micro ROC AUC")
    plt.xlabel("Epochs")
    plt.ylabel("Metric")
    plt.title("Accuracy and ROC AUC")
    plt.legend()

    # Plot the class-wise ROC AUC
    plt.subplot(1, 3, 3)
    plt.bar(range(10), result.class_roc_aucs[-1])
    plt.xlabel("Class")
    plt.ylabel("ROC AUC")
    plt.title("Class-wise ROC AUC")

    plt.show()

# Generate a summary of the experiments
summary_data = []
for result in results:
    config = result.config
    final_test_loss = result.test_losses[-1]
    final_accuracy = result.accuracies[-1]
    final_macro_roc_auc = result.macro_roc_aucs[-1]
    final_micro_roc_auc = result.micro_roc_aucs[-1]
    summary_data.append({
        'Learning Rate': config['learning_rate'],
        'Batch Size': config['batch_size'],
        'Num Epochs': config['num_epochs'],
        'Optimizer': config['optimizer'],
        'Rotate': config['augmentation']['rotate'],
        'Scale Min': config['augmentation']['scale_min'],
        'Scale Max': config['augmentation']['scale_max'],
        'Min Ratio': config['augmentation']['min_ratio'],
        'Max Ratio': config['augmentation']['max_ratio'],
        'Final Train Loss': result.train_losses[-1],
        'Final Test Loss': final_test_loss,
        'Final Accuracy': final_accuracy,
        'Final Macro ROC AUC': final_macro_roc_auc,
        'Final Micro ROC AUC': final_micro_roc_auc
    })

# Save the summary to a CSV file
summary_df = pd.DataFrame(summary_data)
summary_df.to_csv('experiment_summary_with_class.csv', index=False)

## 5. Best Model Training(with the Best Configuration)

In [6]:
# Define the best configuration
best_config = {
    'learning_rate': 0.001,
    'batch_size': 64,
    'num_epochs': 20,
    'optimizer': 'adam',
    'augmentation': {
        'rotate': 15,
        'scale_min': 0.9,
        'scale_max': 1.0,
        'min_ratio': 0.85,
        'max_ratio': 1.23
    }
}

# Train the best model
runner = ExperimentRunner(best_config)
runner.train()

  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)


Using downloaded and verified file: ./data\train_32x32.mat
Using downloaded and verified file: ./data\test_32x32.mat
Epoch 1/20, Train Loss: 1.9316, Test Loss: 0.9375, Accuracy: 0.7084, Macro ROC AUC: 0.9330, Micro ROC AUC: 0.9382
Confusion Matrix: [[1431   49   39   28   21   20   35   47    9   62]
 [ 371 3614  498   42   94   26   15  392    7   30]
 [  68   79 3616   89   58   69   11   93    7   51]
 [ 110   63  275 1560   34  285   40   86   68  355]
 [ 120  117  136   30 1924   41   11   12    4  124]
 [  23   15   37  133   40 1955  100   10   19   49]
 [  99   49   35   69   84  496  966    9  119   44]
 [  43   98  126   27    4  106    5 1599    3    5]
 [ 206   32   74  220   43   95  102   20  654  211]
 [ 137   31   73   94   32   82   17   23   17 1088]]
Epoch 1 / 20, Time: 57.38 s
Epoch 2/20, Train Loss: 1.2936, Test Loss: 0.5198, Accuracy: 0.8486, Macro ROC AUC: 0.9716, Micro ROC AUC: 0.9719
Confusion Matrix: [[1476   13   34   27    8   15   80    3   11   74]
 [ 156 

# 6. Model Deployment with Gradio

In [7]:
# Load the best model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG().to(device)
state_dict = torch.load('best_trained_vgg.pth', map_location=device)
model.load_state_dict(state_dict, strict=False)
model.eval()

# Define the preprocessing function
def preprocess_image(image):
    try:
        # Print the type and keys of the received image
        print(f"Received image data type: {type(image)}")
        if isinstance(image, dict):
            print(f"Received image keys: {image.keys()}")
            image = image.get('composite')  # Extract the image from the dictionary
            print(f"Extracted image data type: {type(image)}")
            print(f"Extracted image shape: {image.shape if isinstance(image, np.ndarray) else 'Not a numpy array'}")

        if not isinstance(image, np.ndarray):
            print("Image is not a numpy array after extraction.")
            return None

        # Convert the image to a PIL Image
        image = Image.fromarray(np.uint8(image)).convert("RGBA")

        # Create a white background
        white_background = Image.new("RGB", image.size, (255, 255, 255))

        # Paste the image on the white background
        white_background.paste(image, mask=image.split()[3])  # Use the alpha channel for masking

        # Convert the image to a numpy array
        image = np.array(white_background)

        # Save the original input image with a white background
        Image.fromarray(np.uint8(image)).save('original_input_image_with_white_background.png')

        # Resize and transform the image to a tensor
        resized_image = Image.fromarray(image).resize((32, 32))  # Resize to 32x32
        resized_image = np.array(resized_image)
        transformed = torch.tensor(resized_image).permute(2, 0, 1).unsqueeze(0).float().to(device)

        # Save the preprocessed image
        torchvision.utils.save_image(transformed / 255.0, 'preprocessed_image_without_norm.png')  

        return transformed
    except Exception as e:
        print(f"Error during preprocessing: {e}")
        return None

# Define the preprocessing function
def classify_digit(sketchpad_image):
    # Choose non null image
    image = sketchpad_image
    if image is None:
        return "No image provided."

    img_tensor = preprocess_image(image)
    if img_tensor is None:
        return "Error during preprocessing."

    try:
        with torch.no_grad():
            output = model(img_tensor)  # Run the model
            probabilities = torch.softmax(output, dim=1).cpu().numpy()[0]
        return {str(i): float(probabilities[i]) for i in range(10)}
    except Exception as e:
        print(f"Error during classification: {e}")
        return "Error during classification."

# Create a Gradio interface
sketchpad = gr.Sketchpad()
interface = gr.Interface(
    fn=classify_digit,
    inputs=sketchpad,
    outputs="label",
    live=True
)

# Launch the interface
interface.launch(share=True)

  state_dict = torch.load('best_trained_vgg.pth', map_location=device)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://8d1157014dd33dc1e0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


