# 📦 MobileNetV2 Training + MLflow Logging

In [20]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import mlflow
import mlflow.pytorch
import mlflow.pyfunc
import numpy as np

In [21]:
BATCH_SIZE = 16
EPOCHS = 10
NUM_CLASSES = 2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATA_DIR = "data"  # expects data/train and data/test

In [22]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "train"), transform=transform)
test_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "test"), transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [23]:
base_model = torchvision.models.mobilenet_v2(pretrained=True)
for param in base_model.features.parameters():
    param.requires_grad = False  # optional: freeze feature extractor

base_model.classifier[1] = nn.Linear(base_model.last_channel, NUM_CLASSES)
base_model = base_model.to(DEVICE)

In [24]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(base_model.parameters(), lr=1e-3)

def evaluate(model, dataloader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            outputs = model(x)
            _, preds = torch.max(outputs, 1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    return correct / total

In [26]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("hotdog-classifier-mobilenet-v2")


with mlflow.start_run() as run:
    mlflow.log_param("model", "MobileNetV2")
    mlflow.log_param("epochs", EPOCHS)
    mlflow.log_param("batch_size", BATCH_SIZE)

    for epoch in range(EPOCHS):
        base_model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = base_model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
        train_loss = running_loss / len(train_loader.dataset)
        
        val_acc = evaluate(base_model, test_loader)
        print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Val Acc: {val_acc:.4f}")
        mlflow.log_metric("train_loss", train_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_acc, step=epoch)
    
    # Final evaluation
    final_val_acc = evaluate(base_model, test_loader)
    mlflow.log_metric("final_val_accuracy", final_val_acc)
    
    # Save the trained model state_dict
    torch.save(base_model.state_dict(), "mobilenetv2_state.pt")
    print("✅ Torch model state_dict saved.")

    # Log the .pt file as an MLflow artifact, and also log using Pyfunc
    class MobileNetV2MLflowWrapper(mlflow.pyfunc.PythonModel):
        def load_context(self, context):
            import torch
            import torchvision
            import torch.nn as nn
            # Rebuild the model and load the state_dict
            model = torchvision.models.mobilenet_v2(pretrained=False)
            model.classifier[1] = nn.Linear(model.last_channel, NUM_CLASSES)
            state_dict = torch.load(context.artifacts["torch_model_state"], map_location="cpu")
            model.load_state_dict(state_dict)
            model.eval()
            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model.to(self.device)
            self.model = model

        def predict(self, context, model_input):
            import torch
            # Accept both numpy arrays and DataFrames
            if hasattr(model_input, "values"):
                x = model_input.values
            else:
                x = model_input
            # Ensure float32 and torch tensor
            x = torch.tensor(x, dtype=torch.float32).to(self.device)
            # Add batch dimension if single image
            if x.ndim == 3:
                x = x.unsqueeze(0)
            with torch.no_grad():
                outputs = self.model(x)
                _, preds = torch.max(outputs, 1)
            return preds.cpu().numpy()

    artifacts = {"torch_model_state": "mobilenetv2_state.pt"}
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=MobileNetV2MLflowWrapper(),
        artifacts=artifacts,
    )
    print("✅ Model logged to MLflow with pyfunc wrapper.")

# Cell 8: Local Inference Test (float32 and float64)
run_id = run.info.run_id
pyfunc_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# Generate a dummy image, with proper normalization and shape (batch, channels, H, W)
dummy_img_float32 = np.random.randn(1, 3, 128, 128).astype(np.float32)
dummy_img_float64 = np.random.randn(1, 3, 128, 128).astype(np.float64)

print("Prediction with float32:", pyfunc_model.predict(dummy_img_float32))
print("Prediction with float64:", pyfunc_model.predict(dummy_img_float64))  # Should work!

# You can also test with a single image shape (3, 128, 128):
single_img = np.random.randn(3, 128, 128).astype(np.float64)
print("Prediction with single image (float64):", pyfunc_model.predict(single_img))

Epoch 1/10, Train Loss: 0.3017, Val Acc: 0.7780
Epoch 2/10, Train Loss: 0.3220, Val Acc: 0.7780
Epoch 3/10, Train Loss: 0.3461, Val Acc: 0.7660
Epoch 4/10, Train Loss: 0.2974, Val Acc: 0.7960
Epoch 5/10, Train Loss: 0.2725, Val Acc: 0.7640
Epoch 6/10, Train Loss: 0.3007, Val Acc: 0.8080
Epoch 7/10, Train Loss: 0.2619, Val Acc: 0.7980
Epoch 8/10, Train Loss: 0.3101, Val Acc: 0.7720
Epoch 9/10, Train Loss: 0.3069, Val Acc: 0.8100
Epoch 10/10, Train Loss: 0.2524, Val Acc: 0.8080
✅ Torch model state_dict saved.




✅ Model logged to MLflow with pyfunc wrapper.
🏃 View run traveling-fox-18 at: http://127.0.0.1:5000/#/experiments/910632922734708419/runs/25bf3a3031e145eb855e8f90c1fe0b99
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/910632922734708419




Prediction with float32: [1]
Prediction with float64: [1]
Prediction with single image (float64): [1]
