
# PyTorch + MLflow (MNIST) — Simple Demo

This notebook trains a tiny CNN on MNIST, logs it to **MLflow** with a **model signature**, and then loads it back using the **pyfunc** flavor for inference.

**Prereqs**
- Make sure an MLflow server is running at `http://127.0.0.1:8080`.
- If needed, install packages in the first cell.


In [None]:

# If packages are missing, uncomment and run:
# !pip install torch torchvision mlflow numpy pandas


In [None]:

import numpy as np
import pandas as pd
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import mlflow, mlflow.pytorch
from mlflow.models import infer_signature

# ——— MLflow points to your running server ———
mlflow.set_tracking_uri("http://127.0.0.1:8080")
mlflow.set_experiment("PyTorch_MNIST_Image_Demo_Simple")

DEVICE    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH     = 128
EPOCHS    = 1  # small for demo
print("Using device:", DEVICE)


In [None]:

# Keep transforms simple: raw [0,1] tensors; we'll normalize inside the model
tfm = transforms.ToTensor()

train_ds = datasets.MNIST(root="./data", train=True,  download=True, transform=tfm)
test_ds  = datasets.MNIST(root="./data", train=False, download=True, transform=tfm)

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH, shuffle=False)

len(train_ds), len(test_ds)


In [None]:

class SmallCNN(nn.Module):
    """A tiny CNN; includes normalization so inference with pyfunc is easy."""
    def __init__(self, num_classes=10):
        super().__init__()
        # MNIST stats as buffers (move with the model to CPU/GPU)
        self.register_buffer("mean", torch.tensor([0.1307]).view(1,1,1,1))
        self.register_buffer("std",  torch.tensor([0.3081]).view(1,1,1,1))

        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1   = nn.Linear(32*7*7, 64)
        self.fc2   = nn.Linear(64, num_classes)

    def forward(self, x):
        x = (x - self.mean) / self.std               # normalize
        x = F.relu(self.conv1(x)); x = F.max_pool2d(x, 2)  # 28→14
        x = F.relu(self.conv2(x)); x = F.max_pool2d(x, 2)  # 14→7
        x = x.view(x.size(0), -1)                         # flatten
        x = F.relu(self.fc1(x))
        return self.fc2(x)                                # logits

model = SmallCNN().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
model


In [None]:

def evaluate(net, loader):
    net.eval()
    correct = total = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            pred = net(xb).argmax(1)
            correct += (pred == yb).sum().item()
            total   += yb.numel()
    return correct / max(1, total)

with mlflow.start_run(run_name="mnist-smallcnn") as run:
    mlflow.log_params({"epochs": EPOCHS, "batch_size": BATCH, "model": "SmallCNN"})
    mlflow.set_tags({"task": "image-classification", "dataset": "MNIST"})

    # — Train (1 epoch, simple loop)
    model.train()
    for epoch in range(EPOCHS):
        for i, (xb, yb) in enumerate(train_loader):
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            loss = criterion(model(xb), yb)
            loss.backward()
            optimizer.step()

            # log a few losses so you see activity in the UI
            if (i + 1) % 100 == 0:
                mlflow.log_metric("train_loss", float(loss.item()),
                                  step=epoch*len(train_loader)+(i+1))

        val_acc = evaluate(model, test_loader)
        mlflow.log_metric("val_accuracy", float(val_acc), step=epoch)
        print(f"Epoch {epoch+1}/{EPOCHS} | Val Acc: {val_acc:.3f}")

    # — Build a small example to create a clean model signature
    model.eval()
    xb_example, _ = next(iter(test_loader))     # [B,1,28,28] in [0,1]
    xb_example = xb_example[:4].to(DEVICE)      # small batch
    with torch.no_grad():
        y_prob = model(xb_example).softmax(1).cpu().numpy()  # [4,10]

    input_example = xb_example.cpu().numpy()    # [4,1,28,28]
    signature = infer_signature(input_example, y_prob)

    # — Log the model; keep the return value (model_info)
    model_info = mlflow.pytorch.log_model(
        pytorch_model=model,
        artifact_path="model",
        signature=signature,
        input_example=input_example,
    )

    print("Run ID     :", run.info.run_id)
    print("Model  URI :", model_info.model_uri)


In [None]:

# Load the just-logged model via PyFunc and predict
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

# Prepare 8 test images as numpy [N,1,28,28]
test_xb, test_yb = next(iter(test_loader))
x_np = test_xb[:8].numpy()

probs = loaded_model.predict(x_np)     # -> numpy [8,10]
preds = probs.argmax(axis=1)

print("probs shape:", probs.shape)
print("preds      :", preds.tolist())
print("true       :", test_yb[:8].numpy().tolist())
