### Define Model Architecture

In [1]:
import os
import torch

# Define the MNIST CNN model architecture (as used during training)
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 20, 5, 1)
        self.conv2 = torch.nn.Conv2d(20, 50, 5, 1)
        self.fc1 = torch.nn.Linear(4 * 4 * 50, 500)
        self.fc2 = torch.nn.Linear(500, 10)

    def forward(self, x):
        import torch.nn.functional as F
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4 * 4 * 50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

### Load Trained model for Inferencing

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
model_path = "/opt/app-root/src/shared/checkpoints/snapshot_mnist.pt"

# Load the checkpoint dictionary
checkpoint = torch.load(model_path, map_location=device, weights_only=True)

# Extract just the model state dict
model.load_state_dict(checkpoint["MODEL_STATE"])
model.eval()
print("Model loaded and set to evaluation mode.")

# If you also want to see the training info:
print(f"Model was trained for {checkpoint['EPOCHS_RUN']} epochs")

Model loaded and set to evaluation mode.
Model was trained for 4 epochs


### Test Sample Handwritten Images to Predict Digit

In [7]:
import torchvision.transforms as transforms
from PIL import Image

image_path="test_images/three.png"

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

# Load the image and apply the transformations
try:
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0).to(device)
    # Perform inference using the model
    with torch.no_grad():
        output = model(image)
        probabilities = torch.exp(output)  # Convert log_softmax to probabilities
        predicted = output.argmax(dim=1, keepdim=True).item()
        confidence = probabilities[0][predicted].item()
    print(f"Predicted handwritten digit: {predicted}")
    print(f"Confidence: {confidence:.4f}")
except FileNotFoundError:
    print(f"Image not found: {image_path}")

Predicted handwritten digit: 3
Confidence: 1.0000


## What We Achieved

### 1. Model Training
- **Dataset**: MNIST handwritten digits
- **Model Architecture**: Convolutional Neural Network with:
  - Convolutional layers for feature extraction
  - Pooling layers for dimensionality reduction
  - Dense layers for classification
- **Training**: Distributed training using Kubeflow Training Operator
- **Training Framework**: PyTorch
- **Storage**: Persistent volumes for model checkpoint artifacts and data
- **Scalability**: 
    - Distributed training capabilities for larger datasets
    - Scales efficiently in a Kubernetes environment

## Key Features Demonstrated
- ✅ End-to-end ML pipeline from training to inference
- ✅ Python API to interact with Kubeflow Trainer V1 using Kubeflow Training SDK
- ✅ Scalable distributed training

## Results
- **Output**: Trained model saved with high accuracy on digit classification
- **Prediction**: Real-time inference on custom handwritten digits
- **Results**: Accurate digit classification with confidence scores