<a href="https://colab.research.google.com/github/ambermerina43-sketch/MLOPs-products/blob/main/Textrecognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

"""
COMPLETE MNIST TRAINING GUIDE
==============================
This script does EVERYTHING you need:
1. Install dependencies
2. Load MNIST dataset
3. Build a CNN model
4. Train the model
5. Visualize results with Weights & Biases

Each section is clearly explained!
"""

# =============================================================================
# PART 1: INSTALL ALL REQUIRED LIBRARIES
# =============================================================================
print("=" * 80)
print("PART 1: Installing Required Libraries")
print("=" * 80)

# What this does: Installs all the Python packages we need
import subprocess
import sys

def install_package(package):
    """Install a Python package using pip"""
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

# List of required packages
packages = [
    "torch",           # PyTorch - for building neural networks
    "torchvision",     # Contains MNIST dataset
    "matplotlib",      # For creating plots and visualizations
    "numpy",           # For numerical operations
    "wandb",           # Weights & Biases - for experiment tracking
    "tqdm",            # Progress bars
]

print("Installing packages...")
for package in packages:
    try:
        install_package(package)
        print(f"‚úì Installed {package}")
    except:
        print(f"‚úó Failed to install {package}")

print("\n‚úì All packages installed!\n")


# =============================================================================
# PART 2: IMPORT LIBRARIES
# =============================================================================
print("=" * 80)
print("PART 2: Importing Libraries")
print("=" * 80)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import wandb
from tqdm import tqdm
import os # Import os module for directory operations

print(f"‚úì PyTorch version: {torch.__version__}")
print(f"‚úì CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úì GPU: {torch.cuda.get_device_name(0)}")
else:
    print("! Running on CPU (will be slower)")
print()


# =============================================================================
# PART 3: SET UP DEVICE (GPU or CPU)
# =============================================================================
print("=" * 80)
print("PART 3: Setting Up Computing Device")
print("=" * 80)

# What this does: Checks if GPU is available, otherwise uses CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úì Using device: {device}\n")

# =============================================================================
# PART 3.5: CREATE OUTPUTS DIRECTORY
# =============================================================================
print("=" * 80)
print("PART 3.5: Creating Output Directory")
print("=" * 80)

# Create the directory for saving outputs if it doesn't exist
output_dir = '/mnt/user-data/outputs'
os.makedirs(output_dir, exist_ok=True)
print(f"‚úì Created output directory: {output_dir}\n")

# =============================================================================
# PART 4: DOWNLOAD AND PREPARE MNIST DATASET
# =============================================================================
print("=" * 80)
print("PART 4: Loading MNIST Dataset")
print("=" * 80)

# What is MNIST?
# - Collection of 70,000 handwritten digits (0-9)
# - 60,000 for training, 10,000 for testing
# - Each image is 28x28 pixels, grayscale

# Transform: Converts images to tensors and normalizes them
# Normalization makes training more stable
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to PyTorch tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize to range [-1, 1]
])

# Download training data
print("Downloading training data...")
train_dataset = torchvision.datasets.MNIST(
    root='./data',      # Where to save the data
    train=True,         # Get training set
    download=True,      # Download if not already present
    transform=transform # Apply transformations
)

# Download test data
print("Downloading test data...")
test_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,        # Get test set
    download=True,
    transform=transform
)

print(f"‚úì Training samples: {len(train_dataset)}")
print(f"‚úì Test samples: {len(test_dataset)}")
print()


# =============================================================================
# PART 5: CREATE DATA LOADERS
# =============================================================================
print("=" * 80)
print("PART 5: Creating Data Loaders")
print("=" * 80)

# What is a DataLoader?
# - Batches data into groups (e.g., 64 images at a time)
# - Shuffles data for better training
# - Handles loading data efficiently

batch_size = 64  # Process 64 images at a time

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,       # Shuffle for randomness
    num_workers=2       # Use 2 processes to load data faster
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2
)

print(f"‚úì Batch size: {batch_size}")
print(f"‚úì Training batches: {len(train_loader)}")
print(f"‚úì Test batches: {len(test_loader)}")
print()


# =============================================================================
# PART 6: VISUALIZE SAMPLE DATA
# =============================================================================
print("=" * 80)
print("PART 6: Visualizing Sample Images")
print("=" * 80)

def show_sample_images(dataset, num_samples=10):
    """Display sample images from the dataset"""
    fig, axes = plt.subplots(2, 5, figsize=(12, 5))
    axes = axes.flatten()

    for i in range(num_samples):
        image, label = dataset[i]

        # Convert tensor to numpy for visualization
        image = image.squeeze().numpy()

        axes[i].imshow(image, cmap='gray')
        axes[i].set_title(f'Label: {label}')
        axes[i].axis('off')

    plt.tight_layout()
    plt.savefig('/mnt/user-data/outputs/sample_mnist_images.png', dpi=150, bbox_inches='tight')
    print("‚úì Sample images saved to: sample_mnist_images.png")
    plt.close()

show_sample_images(train_dataset)
print()


# =============================================================================
# PART 7: BUILD THE CNN MODEL
# =============================================================================
print("=" * 80)
print("PART 7: Building CNN Model")
print("=" * 80)

class CNN(nn.Module):
    """
    Convolutional Neural Network for MNIST

    Architecture:
    - Conv Layer 1: Detects basic features (edges, curves)
    - Conv Layer 2: Detects more complex patterns
    - Fully Connected Layers: Makes final classification
    """

    def __init__(self):
        super(CNN, self).__init__()

        # CONVOLUTIONAL LAYERS
        # Layer 1: 1 input channel (grayscale), 32 output channels
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)  # Reduces size by half

        # Layer 2: 32 input channels, 64 output channels
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2)  # Reduces size by half again

        # FULLY CONNECTED LAYERS
        # After 2 pooling layers: 28x28 -> 14x14 -> 7x7
        # 64 channels √ó 7 √ó 7 = 3136 features
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Hidden layer
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)  # Prevents overfitting
        self.fc2 = nn.Linear(128, 10)  # Output layer (10 digits: 0-9)

    def forward(self, x):
        """
        Forward pass: how data flows through the network
        Input: x is a batch of images [batch_size, 1, 28, 28]
        Output: predictions for each of 10 classes
        """
        # Convolutional layers
        x = self.pool1(self.relu1(self.conv1(x)))  # -> [batch, 32, 14, 14]
        x = self.pool2(self.relu2(self.conv2(x)))  # -> [batch, 64, 7, 7]

        # Flatten for fully connected layers
        x = x.view(-1, 64 * 7 * 7)  # -> [batch, 3136]

        # Fully connected layers
        x = self.relu3(self.fc1(x))  # -> [batch, 128]
        x = self.dropout(x)
        x = self.fc2(x)  # -> [batch, 10]

        return x

# Create the model and move it to GPU/CPU
model = CNN().to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("‚úì Model architecture:")
print(model)
print(f"\n‚úì Total parameters: {total_params:,}")
print(f"‚úì Trainable parameters: {trainable_params:,}")
print()


# =============================================================================
# PART 8: SET UP TRAINING COMPONENTS
# =============================================================================
print("=" * 80)
print("PART 8: Setting Up Training")
print("=" * 80)

# Loss function: Measures how wrong the predictions are
criterion = nn.CrossEntropyLoss()

# Optimizer: Updates model weights to reduce loss
# Adam is a popular, efficient optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler: Reduces learning rate over time
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

print("‚úì Loss function: CrossEntropyLoss")
print("‚úì Optimizer: Adam (learning rate = 0.001)")
print("‚úì Scheduler: StepLR (reduces LR every 5 epochs)")
print()


# =============================================================================
# PART 9: INITIALIZE WEIGHTS & BIASES (W&B)
# =============================================================================
print("=" * 80)
print("PART 9: Setting Up Weights & Biases")
print("=" * 80)

# What is Weights & Biases?
# - Tool for tracking experiments
# - Visualizes loss, accuracy, and more
# - Creates beautiful dashboards

print("Initializing W&B...")
print("You may be asked to log in. Follow the prompts.")

wandb.init(
    project="mnist-cnn-tutorial",  # Project name
    config={                        # Configuration to track
        "architecture": "CNN",
        "dataset": "MNIST",
        "batch_size": batch_size,
        "epochs": 10,
        "learning_rate": 0.001,
        "optimizer": "Adam"
    }
)

# Watch the model (tracks gradients and parameters)
wandb.watch(model, criterion, log="all", log_freq=100)

print("‚úì W&B initialized!")
print(f"‚úì View your results at: {wandb.run.get_url()}")
print()


# =============================================================================
# PART 10: TRAINING FUNCTION
# =============================================================================
print("=" * 80)
print("PART 10: Defining Training Function")
print("=" * 80)

def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch):
    """
    Train the model for one epoch

    Returns:
        avg_loss: Average loss for this epoch
        accuracy: Training accuracy
    """
    model.train()  # Set model to training mode

    running_loss = 0.0
    correct = 0
    total = 0

    # Progress bar
    pbar = tqdm(train_loader, desc=f'Epoch {epoch}')

    for batch_idx, (images, labels) in enumerate(pbar):
        # Move data to device (GPU/CPU)
        images, labels = images.to(device), labels.to(device)

        # TRAINING STEPS:

        # 1. Zero the gradients from previous iteration
        optimizer.zero_grad()

        # 2. Forward pass: get predictions
        outputs = model(images)

        # 3. Calculate loss: how wrong are the predictions?
        loss = criterion(outputs, labels)

        # 4. Backward pass: calculate gradients
        loss.backward()

        # 5. Update weights
        optimizer.step()

        # Track statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Update progress bar
        accuracy = 100. * correct / total
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{accuracy:.2f}%'
        })

        # Log to W&B every 100 batches
        if batch_idx % 100 == 0:
            wandb.log({
                "batch_loss": loss.item(),
                "batch_accuracy": accuracy
            })

    avg_loss = running_loss / len(train_loader)
    final_accuracy = 100. * correct / total

    return avg_loss, final_accuracy

print("‚úì Training function defined")
print()


# =============================================================================
# PART 11: TESTING/EVALUATION FUNCTION
# =============================================================================
print("=" * 80)
print("PART 11: Defining Evaluation Function")
print("=" * 80)

def evaluate(model, test_loader, criterion, device):
    """
    Evaluate the model on test data

    Returns:
        avg_loss: Average loss on test set
        accuracy: Test accuracy
    """
    model.eval()  # Set model to evaluation mode

    test_loss = 0.0
    correct = 0
    total = 0

    # Don't track gradients during evaluation (saves memory)
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc='Testing'):
            images, labels = images.to(device), labels.to(device)

            # Forward pass only
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Track statistics
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    avg_loss = test_loss / len(test_loader)
    accuracy = 100. * correct / total

    return avg_loss, accuracy

print("‚úì Evaluation function defined")
print()


# =============================================================================
# PART 12: MAIN TRAINING LOOP
# =============================================================================
print("=" * 80)
print("PART 12: Starting Training!")
print("=" * 80)

num_epochs = 10

print(f"Training for {num_epochs} epochs...")
print("This will take about 5-10 minutes with GPU, 20-30 minutes with CPU")
print()

best_accuracy = 0.0
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []

for epoch in range(1, num_epochs + 1):
    print(f"\n{'='*60}")
    print(f"EPOCH {epoch}/{num_epochs}")
    print('='*60)

    # Train for one epoch
    train_loss, train_acc = train_one_epoch(
        model, train_loader, criterion, optimizer, device, epoch
    )

    # Evaluate on test set
    test_loss, test_acc = evaluate(model, test_loader, criterion, device)

    # Update learning rate
    scheduler.step()

    # Store metrics
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

    # Log to W&B
    wandb.log({
        "epoch": epoch,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "test_loss": test_loss,
        "test_accuracy": test_acc,
        "learning_rate": optimizer.param_groups[0]['lr']
    })

    # Print epoch summary
    print(f"\nüìä Epoch {epoch} Summary:")
    print(f"   Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"   Test Loss:  {test_loss:.4f} | Test Acc:  {test_acc:.2f}%")

    # Save best model
    if test_acc > best_accuracy:
        best_accuracy = test_acc
        torch.save(model.state_dict(), '/mnt/user-data/outputs/best_model.pth')
        print(f"   ‚úì New best accuracy! Model saved.")

print("\n" + "="*80)
print("TRAINING COMPLETE!")
print("="*80)
print(f"‚úì Best test accuracy: {best_accuracy:.2f}%")
print()


# =============================================================================
# PART 13: VISUALIZE TRAINING RESULTS
# =============================================================================
print("=" * 80)
print("PART 13: Creating Visualizations")
print("=" * 80)

def plot_training_history(train_losses, test_losses, train_accs, test_accs):
    """Plot loss and accuracy curves"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    epochs = range(1, len(train_losses) + 1)

    # Plot losses
    ax1.plot(epochs, train_losses, 'b-', label='Training Loss', linewidth=2)
    ax1.plot(epochs, test_losses, 'r-', label='Test Loss', linewidth=2)
    ax1.set_xlabel('Epoch', fontsize=12)
    ax1.set_ylabel('Loss', fontsize=12)
    ax1.set_title('Training and Test Loss', fontsize=14, fontweight='bold')
    ax1.legend(fontsize=10)
    ax1.grid(True, alpha=0.3)

    # Plot accuracies
    ax2.plot(epochs, train_accs, 'b-', label='Training Accuracy', linewidth=2)
    ax2.plot(epochs, test_accs, 'r-', label='Test Accuracy', linewidth=2)
    ax2.set_xlabel('Epoch', fontsize=12)
    ax2.set_ylabel('Accuracy (%)', fontsize=12)
    ax2.set_title('Training and Test Accuracy', fontsize=14, fontweight='bold')
    ax2.legend(fontsize=10)
    ax2.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig('/mnt/user-data/outputs/training_history.png', dpi=150, bbox_inches='tight')
    print("‚úì Training history plot saved")
    plt.close()

plot_training_history(train_losses, test_losses, train_accuracies, test_accuracies)


# =============================================================================
# PART 14: VISUALIZE PREDICTIONS
# =============================================================================
print("=" * 80)
print("PART 14: Visualizing Model Predictions")
print("=" * 80)

def visualize_predictions(model, test_loader, device, num_images=20):
    """Show model predictions on test images"""
    model.eval()

    # Get a batch of test images
    images, labels = next(iter(test_loader))
    images, labels = images.to(device), labels.to(device)

    # Get predictions
    with torch.no_grad():
        outputs = model(images)
        _, predictions = outputs.max(1)

    # Move to CPU for plotting
    images = images.cpu()
    labels = labels.cpu()
    predictions = predictions.cpu()

    # Plot
    fig, axes = plt.subplots(4, 5, figsize=(15, 12))
    axes = axes.flatten()

    for i in range(num_images):
        image = images[i].squeeze().numpy()
        true_label = labels[i].item()
        pred_label = predictions[i].item()

        axes[i].imshow(image, cmap='gray')

        # Color: green if correct, red if wrong
        color = 'green' if true_label == pred_label else 'red'
        axes[i].set_title(f'True: {true_label}\nPred: {pred_label}',
                         color=color, fontweight='bold')
        axes[i].axis('off')

    plt.tight_layout()
    plt.savefig('/mnt/user-data/outputs/predictions.png', dpi=150, bbox_inches='tight')
    print("‚úì Predictions visualization saved")
    plt.close()

visualize_predictions(model, test_loader, device)


# =============================================================================
# PART 15: CONFUSION MATRIX
# =============================================================================
print("=" * 80)
print("PART 15: Creating Confusion Matrix")
print("=" * 80)

def create_confusion_matrix(model, test_loader, device):
    """Create confusion matrix showing which digits are confused"""
    model.eval()

    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predictions = outputs.max(1)

            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.numpy())

    # Create confusion matrix
    from sklearn.metrics import confusion_matrix
    import seaborn as sns

    cm = confusion_matrix(all_labels, all_predictions)

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=range(10), yticklabels=range(10))
    plt.xlabel('Predicted', fontsize=12)
    plt.ylabel('True', fontsize=12)
    plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.savefig('/mnt/user-data/outputs/confusion_matrix.png', dpi=150, bbox_inches='tight')
    print("‚úì Confusion matrix saved")
    plt.close()

# Install sklearn if needed
try:
    from sklearn.metrics import confusion_matrix
    import seaborn as sns
except:
    install_package("scikit-learn")
    install_package("seaborn")
    from sklearn.metrics import confusion_matrix
    import seaborn as sns

create_confusion_matrix(model, test_loader, device)


# =============================================================================
# PART 16: SAVE FINAL MODEL
# =============================================================================
print("=" * 80)
print("PART 16: Saving Model")
print("=" * 80)

# Save complete model
torch.save({
    'epoch': num_epochs,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'best_accuracy': best_accuracy,
}, '/mnt/user-data/outputs/final_model_checkpoint.pth')

print("‚úì Model checkpoint saved to: final_model_checkpoint.pth")
print()


# =============================================================================
# PART 17: FINISH W&B
# =============================================================================
print("=" * 80)
print("PART 17: Finalizing Weights & Biases")
print("=" * 80)

# Log final images to W&B
wandb.log({
    "sample_images": wandb.Image('/mnt/user-data/outputs/sample_mnist_images.png'),
    "training_history": wandb.Image('/mnt/user-data/outputs/training_history.png'),
    "predictions": wandb.Image('/mnt/user-data/outputs/predictions.png'),
    "confusion_matrix": wandb.Image('/mnt/user-data/outputs/confusion_matrix.png')
})

wandb.finish()
print("‚úì W&B session closed")
print()


# =============================================================================
# FINAL SUMMARY
# =============================================================================
print("\n" + "="*80)
print("üéâ ALL DONE! üéâ")
print("="*80)
print("\nüìä RESULTS SUMMARY:")
print(f"   ‚Ä¢ Final Test Accuracy: {best_accuracy:.2f}%")
print(f"   ‚Ä¢ Total Epochs: {num_epochs}")
print(f"   ‚Ä¢ Total Parameters: {total_params:,}")
print("\nüìÅ FILES CREATED:")
print("   ‚Ä¢ sample_mnist_images.png - Sample training images")
print("   ‚Ä¢ training_history.png - Loss and accuracy curves")
print("   ‚Ä¢ predictions.png - Model predictions visualization")
print("   ‚Ä¢ confusion_matrix.png - Confusion matrix")
print("   ‚Ä¢ best_model.pth - Best model weights")
print("   ‚Ä¢ final_model_checkpoint.pth - Complete checkpoint")
print("\nüåê VIEW YOUR RESULTS:")
print(f"   ‚Ä¢ Weights & Biases Dashboard: {wandb.run.get_url() if wandb.run else 'N/A'}")
print("\n" + "="*80)
print("Thank you for training with this guide!")
print("="*80)


PART 1: Installing Required Libraries
Installing packages...
‚úì Installed torch
‚úì Installed torchvision
‚úì Installed matplotlib
‚úì Installed numpy
‚úì Installed wandb
‚úì Installed tqdm

‚úì All packages installed!

PART 2: Importing Libraries
‚úì PyTorch version: 2.9.0+cu126
‚úì CUDA available: True
‚úì GPU: Tesla T4

PART 3: Setting Up Computing Device
‚úì Using device: cuda

PART 3.5: Creating Output Directory
‚úì Created output directory: /mnt/user-data/outputs

PART 4: Loading MNIST Dataset
Downloading training data...
Downloading test data...
‚úì Training samples: 60000
‚úì Test samples: 10000

PART 5: Creating Data Loaders
‚úì Batch size: 64
‚úì Training batches: 938
‚úì Test batches: 157

PART 6: Visualizing Sample Images
‚úì Sample images saved to: sample_mnist_images.png

PART 7: Building CNN Model
‚úì Model architecture:
CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, paddin

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 1


[34m[1mwandb[0m: You chose 'Create a W&B account'
[34m[1mwandb[0m: Create an account here: https://wandb.ai/authorize?signup=true&ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mambermerina43[0m ([33mambermerina43-rmit-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




‚úì W&B initialized!
‚úì View your results at: https://wandb.ai/ambermerina43-rmit-university/mnist-cnn-tutorial/runs/lvpjaz9c

PART 10: Defining Training Function
‚úì Training function defined

PART 11: Defining Evaluation Function
‚úì Evaluation function defined

PART 12: Starting Training!
Training for 10 epochs...
This will take about 5-10 minutes with GPU, 20-30 minutes with CPU


EPOCH 1/10


Epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:18<00:00, 50.22it/s, loss=0.2165, acc=91.63%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:03<00:00, 52.24it/s]



üìä Epoch 1 Summary:
   Train Loss: 0.2653 | Train Acc: 91.63%
   Test Loss:  0.0521 | Test Acc:  98.23%
   ‚úì New best accuracy! Model saved.

EPOCH 2/10


Epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:16<00:00, 56.64it/s, loss=0.0097, acc=97.22%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:01<00:00, 81.66it/s]



üìä Epoch 2 Summary:
   Train Loss: 0.0960 | Train Acc: 97.22%
   Test Loss:  0.0316 | Test Acc:  98.98%
   ‚úì New best accuracy! Model saved.

EPOCH 3/10


Epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:16<00:00, 57.64it/s, loss=0.2153, acc=97.90%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:01<00:00, 81.13it/s]



üìä Epoch 3 Summary:
   Train Loss: 0.0700 | Train Acc: 97.90%
   Test Loss:  0.0277 | Test Acc:  99.10%
   ‚úì New best accuracy! Model saved.

EPOCH 4/10


Epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:17<00:00, 54.27it/s, loss=0.1168, acc=98.26%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:02<00:00, 71.31it/s]



üìä Epoch 4 Summary:
   Train Loss: 0.0579 | Train Acc: 98.26%
   Test Loss:  0.0243 | Test Acc:  99.14%
   ‚úì New best accuracy! Model saved.

EPOCH 5/10


Epoch 5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:16<00:00, 57.60it/s, loss=0.0039, acc=98.56%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:01<00:00, 82.62it/s]



üìä Epoch 5 Summary:
   Train Loss: 0.0477 | Train Acc: 98.56%
   Test Loss:  0.0316 | Test Acc:  99.03%

EPOCH 6/10


Epoch 6: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:16<00:00, 56.43it/s, loss=0.0879, acc=99.03%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:02<00:00, 67.40it/s]



üìä Epoch 6 Summary:
   Train Loss: 0.0318 | Train Acc: 99.03%
   Test Loss:  0.0215 | Test Acc:  99.38%
   ‚úì New best accuracy! Model saved.

EPOCH 7/10


Epoch 7: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:17<00:00, 54.39it/s, loss=0.0195, acc=99.23%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:01<00:00, 80.68it/s]



üìä Epoch 7 Summary:
   Train Loss: 0.0264 | Train Acc: 99.23%
   Test Loss:  0.0206 | Test Acc:  99.39%
   ‚úì New best accuracy! Model saved.

EPOCH 8/10


Epoch 8: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:16<00:00, 57.45it/s, loss=0.0006, acc=99.27%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:01<00:00, 81.47it/s]



üìä Epoch 8 Summary:
   Train Loss: 0.0230 | Train Acc: 99.27%
   Test Loss:  0.0205 | Test Acc:  99.39%

EPOCH 9/10


Epoch 9: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:17<00:00, 55.14it/s, loss=0.0020, acc=99.34%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:02<00:00, 59.45it/s]



üìä Epoch 9 Summary:
   Train Loss: 0.0218 | Train Acc: 99.34%
   Test Loss:  0.0211 | Test Acc:  99.35%

EPOCH 10/10


Epoch 10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 938/938 [00:16<00:00, 57.03it/s, loss=0.0779, acc=99.36%]
Testing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 157/157 [00:01<00:00, 79.24it/s]



üìä Epoch 10 Summary:
   Train Loss: 0.0213 | Train Acc: 99.36%
   Test Loss:  0.0204 | Test Acc:  99.43%
   ‚úì New best accuracy! Model saved.

TRAINING COMPLETE!
‚úì Best test accuracy: 99.43%

PART 13: Creating Visualizations
‚úì Training history plot saved
PART 14: Visualizing Model Predictions
‚úì Predictions visualization saved
PART 15: Creating Confusion Matrix
‚úì Confusion matrix saved
PART 16: Saving Model
‚úì Model checkpoint saved to: final_model_checkpoint.pth

PART 17: Finalizing Weights & Biases


0,1
batch_accuracy,‚ñÅ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
batch_loss,‚ñà‚ñÑ‚ñÜ‚ñÖ‚ñá‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÅ‚ñÉ‚ñÜ‚ñÉ‚ñÇ‚ñÇ‚ñÜ‚ñÇ‚ñÅ‚ñÑ‚ñÇ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÜ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÅ
epoch,‚ñÅ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÖ‚ñÜ‚ñÜ‚ñá‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ
test_accuracy,‚ñÅ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñà‚ñà‚ñà‚ñà‚ñà
test_loss,‚ñà‚ñÉ‚ñÉ‚ñÇ‚ñÉ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train_accuracy,‚ñÅ‚ñÜ‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà
train_loss,‚ñà‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ

0,1
batch_accuracy,99.36355
batch_loss,0.0107
epoch,10.0
learning_rate,1e-05
test_accuracy,99.43
test_loss,0.02044
train_accuracy,99.35833
train_loss,0.02129


‚úì W&B session closed


üéâ ALL DONE! üéâ

üìä RESULTS SUMMARY:
   ‚Ä¢ Final Test Accuracy: 99.43%
   ‚Ä¢ Total Epochs: 10
   ‚Ä¢ Total Parameters: 421,642

üìÅ FILES CREATED:
   ‚Ä¢ sample_mnist_images.png - Sample training images
   ‚Ä¢ training_history.png - Loss and accuracy curves
   ‚Ä¢ predictions.png - Model predictions visualization
   ‚Ä¢ confusion_matrix.png - Confusion matrix
   ‚Ä¢ best_model.pth - Best model weights
   ‚Ä¢ final_model_checkpoint.pth - Complete checkpoint

üåê VIEW YOUR RESULTS:
   ‚Ä¢ Weights & Biases Dashboard: N/A

Thank you for training with this guide!


In [None]:
"""
INTERACTIVE HANDWRITTEN DIGIT RECOGNIZER APP
============================================
Draw a digit and watch the AI recognize it in real-time!

This app uses Gradio to create a web interface where you can:
1. Draw digits with your mouse/finger
2. See what the model predicts
3. View confidence scores for all digits
"""

# =============================================================================
# PART 1: INSTALL AND IMPORT LIBRARIES
# =============================================================================
print("Installing Gradio for the web interface...")

import subprocess
import sys

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

# Install required packages
try:
    import gradio as gr
except:
    install_package("gradio")
    import gradio as gr

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

print("‚úì All libraries loaded!")


# =============================================================================
# PART 2: DEFINE THE CNN MODEL (SAME AS TRAINING)
# =============================================================================
print("Setting up the model...")

class CNN(nn.Module):
    """
    Same CNN architecture used for training
    """
    def __init__(self):
        super(CNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2, 2)

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu3(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


# =============================================================================
# PART 3: LOAD THE TRAINED MODEL
# =============================================================================
print("Loading trained model...")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)

# Try to load the trained model
try:
    # Load the best model weights
    model.load_state_dict(torch.load('best_model.pth', map_location=device))
    print("‚úì Loaded trained model from 'best_model.pth'")
except FileNotFoundError:
    print("‚ö† No trained model found!")
    print("Please run the training script first to create 'best_model.pth'")
    print("Or the model will use random weights (won't work well)")

model.eval()  # Set to evaluation mode


# =============================================================================
# PART 4: IMAGE PREPROCESSING FUNCTION
# =============================================================================

def preprocess_image(image):
    """
    Convert drawn image to format expected by model

    Args:
        image: PIL Image or numpy array from Gradio

    Returns:
        tensor: Preprocessed image tensor
    """
    # Convert to PIL Image if numpy array
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image.astype('uint8'))

    # Convert to grayscale
    image = image.convert('L')

    # Resize to 28x28 (MNIST size)
    image = image.resize((28, 28), Image.LANCZOS)

    # Invert colors (MNIST has white digits on black background)
    # Drawing apps usually have black on white
    image = Image.eval(image, lambda x: 255 - x)

    # Convert to tensor and normalize
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    tensor = transform(image)

    # Add batch dimension
    tensor = tensor.unsqueeze(0)

    return tensor


# =============================================================================
# PART 5: PREDICTION FUNCTION
# =============================================================================

def predict_digit(image):
    """
    Predict the digit from a drawn image

    Args:
        image: Image from Gradio drawing canvas

    Returns:
        dict: Confidence scores for each digit (0-9)
    """
    if image is None:
        return {str(i): 0.0 for i in range(10)}

    # Preprocess the image
    tensor = preprocess_image(image).to(device)

    # Get prediction
    with torch.no_grad():
        output = model(tensor)
        # Convert to probabilities using softmax
        probabilities = torch.nn.functional.softmax(output, dim=1)
        probabilities = probabilities.cpu().numpy()[0]

    # Create dictionary of digit: confidence
    predictions = {str(i): float(probabilities[i]) for i in range(10)}

    return predictions


# =============================================================================
# PART 6: CREATE GRADIO INTERFACE
# =============================================================================
print("Creating web interface...")

# Custom CSS for better styling
custom_css = """
#title {
    text-align: center;
    font-size: 2.5em;
    font-weight: bold;
    color: #2563eb;
    margin-bottom: 10px;
}
#description {
    text-align: center;
    font-size: 1.2em;
    color: #64748b;
    margin-bottom: 20px;
}
#draw-canvas {
    border: 3px solid #2563eb;
    border-radius: 10px;
}
"""

# Create the interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:

    # Title and description
    gr.Markdown("# üé® Handwritten Digit Recognizer", elem_id="title")
    gr.Markdown(
        "Draw a digit (0-9) in the canvas below and watch the AI predict it in real-time!",
        elem_id="description"
    )

    with gr.Row():
        # Left column: Drawing canvas
        with gr.Column(scale=1):
            gr.Markdown("### ‚úèÔ∏è Draw Here")
            canvas = gr.Sketchpad(
                label="Draw a digit",
                type="pil",
                image_mode="RGB",
                canvas_size=(280, 280),
                brush=gr.Brush(
                    default_size=20,
                    colors=["#000000"],
                    default_color="#000000"
                ),
                elem_id="draw-canvas"
            )

            with gr.Row():
                clear_btn = gr.Button("üóëÔ∏è Clear", variant="secondary")
                predict_btn = gr.Button("üîÆ Predict", variant="primary")

        # Right column: Results
        with gr.Column(scale=1):
            gr.Markdown("### üìä Prediction Results")

            # Confidence bar chart
            confidence_plot = gr.Label(
                label="Confidence Scores",
                num_top_classes=10
            )

            # Show preprocessed image (what the model sees)
            with gr.Accordion("üîç What the Model Sees", open=False):
                processed_image = gr.Image(
                    label="Preprocessed Image (28√ó28)",
                    type="pil"
                )

    # Instructions
    with gr.Accordion("üìñ How to Use", open=False):
        gr.Markdown("""
        **Instructions:**
        1. Draw a digit (0-9) in the canvas on the left
        2. Click "Predict" or just wait (auto-predicts)
        3. See the results on the right
        4. Click "Clear" to try another digit

        **Tips for Best Results:**
        - Draw digits large and centered
        - Use clear, simple strokes
        - Try to match MNIST style (like handwritten on paper)
        - If prediction is wrong, try drawing clearer

        **What You'll See:**
        - **Confidence Scores**: How sure the model is for each digit
        - **Preprocessed Image**: The 28√ó28 image the model actually sees
        """)

    # Function to show preprocessed image
    def show_preprocessing(image):
        """Show what the model actually sees"""
        if image is None:
            return None

        tensor = preprocess_image(image)

        # Convert back to PIL for display
        img_array = tensor.squeeze().cpu().numpy()
        # Denormalize
        img_array = (img_array * 0.5) + 0.5
        img_array = (img_array * 255).astype(np.uint8)

        return Image.fromarray(img_array, mode='L')

    # Prediction function that returns both results and preprocessed image
    def predict_and_show(image):
        """Predict and show preprocessed image"""
        predictions = predict_digit(image)
        preprocessed = show_preprocessing(image)
        return predictions, preprocessed

    # Event handlers
    predict_btn.click(
        fn=predict_and_show,
        inputs=canvas,
        outputs=[confidence_plot, processed_image]
    )

    clear_btn.click(
        fn=lambda: (None, None, {str(i): 0.0 for i in range(10)}),
        inputs=None,
        outputs=[canvas, processed_image, confidence_plot]
    )

    # Auto-predict on change (with debounce)
    canvas.change(
        fn=predict_and_show,
        inputs=canvas,
        outputs=[confidence_plot, processed_image]
    )

    # Examples section
    gr.Markdown("---")
    gr.Markdown("### üí° Example Digits")
    gr.Markdown("Try drawing digits that look like these MNIST samples:")

    # Could add example images here if you have them


# =============================================================================
# PART 7: LAUNCH THE APP
# =============================================================================
print("\n" + "="*80)
print("üöÄ LAUNCHING APP!")
print("="*80)
print("\nThe app will open in a new browser window/tab.")
print("You can also access it via the public URL shown below.")
print("\nTo stop the app, press Ctrl+C or interrupt the cell.")
print("="*80 + "\n")

# Launch the app
demo.launch(
    share=True,        # Creates a public URL you can share
    debug=False,       # Set to True for debugging
    show_error=True    # Show errors in the interface
)

Installing Gradio for the web interface...
‚úì All libraries loaded!
Setting up the model...
Loading trained model...
‚ö† No trained model found!
Please run the training script first to create 'best_model.pth'
Or the model will use random weights (won't work well)
Creating web interface...


  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:



üöÄ LAUNCHING APP!

The app will open in a new browser window/tab.
You can also access it via the public URL shown below.

To stop the app, press Ctrl+C or interrupt the cell.

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a4e631f28e7a81f849.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


