# Face Mask Detection (MLOps Enhanced)

This notebook implements Face Mask Detection using **Faster R-CNN**, integrating "Production-Grade" practices:
- **MLflow**: For experiment tracking (loss, metrics, artifacts).
- **Mixed Precision (AMP)**: For faster training and lower memory usage.
- **Advanced Evaluation**: Confusion Matrix and Classification Reports adapted for Object Detection.


In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
import torch
import numpy as np
import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import seaborn as sns
from tqdm.auto import tqdm
from PIL import Image
import shutil
import math
from sklearn.metrics import confusion_matrix, classification_report
from torch.amp import autocast, GradScaler

# Add src to path
sys.path.append(os.path.abspath(os.path.join('..')))
from src.dataset import FaceMaskDataset
from src.model import get_model_instance_segmentation
import kagglehub
import torchvision.transforms as T


## 1. Data Preparation
Downloading and setting up the Face Mask Dataset.


In [None]:
# Dataset Download and Move
try:
    cache_path = kagglehub.dataset_download("andrewmvd/face-mask-detection")
    target_path = '../data'
    
    if not os.path.exists(os.path.join(target_path, 'images')):
        print(f"Moving data to {target_path}...")
        os.makedirs(target_path, exist_ok=True)
        for item in os.listdir(cache_path):
            s = os.path.join(cache_path, item)
            d = os.path.join(target_path, item)
            if os.path.isdir(s):
                if os.path.exists(d): shutil.rmtree(d)
                shutil.copytree(s, d)
            else:
                shutil.copy2(s, d)
    ROOT_DIR = target_path
except Exception as e:
    print(f"Error: {e}")
    ROOT_DIR = '../data'

print(f"Dataset Root: {ROOT_DIR}")

# Transforms
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        # transforms.append(T.RandomHorizontalFlip(0.5)) # Optional augmentation
        pass
    return T.Compose(transforms)

# Load Dataset
dataset = FaceMaskDataset(ROOT_DIR, transforms=get_transform(train=False)) # We convert locally in loop if needed, or use separate
# Ideally for training we want transforms, but for simplicity we keep it standard
dataset_train_full = FaceMaskDataset(ROOT_DIR, transforms=get_transform(train=True))
dataset_test_full = FaceMaskDataset(ROOT_DIR, transforms=get_transform(train=False))

# Split
torch.manual_seed(42)
indices = torch.randperm(len(dataset)).tolist()
test_split = int(0.1 * len(dataset))
dataset_train = torch.utils.data.Subset(dataset_train_full, indices[:-test_split])
dataset_test = torch.utils.data.Subset(dataset_test_full, indices[-test_split:])

# Dataloaders
def collate_fn(batch):
    return tuple(zip(*batch))

batch_size = 4
num_workers = 0 # Windows safe

train_dataloader = torch.utils.data.DataLoader(
    dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn)

test_dataloader = torch.utils.data.DataLoader(
    dataset_test, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn)

print(f"Train Size: {len(dataset_train)}, Test Size: {len(dataset_test)}")


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 4 # Background + 3 classes

model = get_model_instance_segmentation(num_classes)
model.to(device)

# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

scaler = GradScaler() # For Mixed Precision


## 2. Methodology: Training & Evaluation
We adapt the reference notebook methods:
- **`train_epoch`**: Uses `tqdm` for progress, `autocast` for AMP, and tracks Loss manually since Detection models return loss dicts.
- **`evaluate`**: Since this is Object Detection, we compute metrics by matching predicted boxes to ground truth (IoU >= 0.5) and then calculating classification metrics (Confusion Matrix, Precision, Recall).


In [None]:
def train_epoch(dataloader, device, model, optimizer, epoch, total_epochs):
    model.train()
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{total_epochs}", unit="batch", leave=True)
    
    for batch_idx, (images, targets) in enumerate(progress_bar):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        
        # Mixed Precision
        # Note: Faster R-CNN handles autocast internally mostly, but explicit context is good practice
        # However, relying on default behavior for simple scripts:
        with autocast(device_type='cuda', enabled=torch.cuda.is_available()):
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
        
        scaler.scale(losses).backward()
        scaler.step(optimizer)
        scaler.update()
        
        loss_val = losses.item()
        total_loss += loss_val
        
        progress_bar.set_postfix({"loss": f"{loss_val:.4f}"})
        
    return total_loss / len(dataloader)


In [None]:
def match_boxes(pred_boxes, true_boxes, iou_threshold=0.5):
    # Simple IoU matching
    if len(pred_boxes) == 0 or len(true_boxes) == 0:
        return []
        
    ious = torchvision.ops.box_iou(pred_boxes, true_boxes)
    matches = []
    
    # Greedy matching
    for i in range(len(pred_boxes)):
        best_iou, best_idx = ious[i].max(dim=0)
        if best_iou > iou_threshold:
            matches.append((i, best_idx.item()))
            # mask out to prevent reuse? Simple greedy doesn't need complex mask for basic stats
            ious[:, best_idx] = -1 
            
    return matches

def evaluate(dataloader, device, model, epoch, class_names):
    model.eval()
    all_preds_cls = []
    all_true_cls = []
    
    with torch.no_grad():
        for images, targets in tqdm(dataloader, desc="Evaluating"):
            images = list(image.to(device) for image in images)
            outputs = model(images)
            
            for i, output in enumerate(outputs):
                target = targets[i]
                
                true_boxes = target['boxes'].to(device)
                true_labels = target['labels'].to(device)
                
                pred_boxes = output['boxes']
                pred_labels = output['labels']
                pred_scores = output['scores']
                
                # Filter low confidence
                keep = pred_scores > 0.5
                pred_boxes = pred_boxes[keep]
                pred_labels = pred_labels[keep]
                
                matches = match_boxes(pred_boxes, true_boxes)
                
                matched_pred_indices = set()
                matched_true_indices = set()
                
                for p_idx, t_idx in matches:
                    all_preds_cls.append(pred_labels[p_idx].item())
                    all_true_cls.append(true_labels[t_idx].item())
                    matched_pred_indices.add(p_idx)
                    matched_true_indices.add(t_idx)
                
                # False Negatives (Missed ground truths)
                for t_idx in range(len(true_labels)):
                    if t_idx not in matched_true_indices:
                        all_true_cls.append(true_labels[t_idx].item())
                        all_preds_cls.append(0) # 0 is background/missed
                        
                # False Positives (Spurious detections)
                for p_idx in range(len(pred_labels)):
                    if p_idx not in matched_pred_indices:
                        all_true_cls.append(0) # Background
                        all_preds_cls.append(pred_labels[p_idx].item())

    # Metrics
    # We add 'Background' to class names for display if not present
    display_names = ['Background'] + list(class_names.values())
    
    # Filter labels to be within range
    unique_labels = sorted(list(set(all_true_cls) | set(all_preds_cls)))
    
    # Confusion Matrix
    cm = confusion_matrix(all_true_cls, all_preds_cls, labels=[0, 1, 2, 3])
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=display_names, yticklabels=display_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(f'Confusion Matrix (Epoch {epoch+1})')
    
    cm_path = f"confusion_matrix_epoch_{epoch+1}.png"
    plt.savefig(cm_path)
    mlflow.log_artifact(cm_path)
    plt.show()
    plt.close()
    
    report = classification_report(all_true_cls, all_preds_cls, target_names=display_names, output_dict=True)
    
    # Log metrics
    mlflow.log_metric("val_accuracy", report['accuracy'], step=epoch)
    mlflow.log_metric("val_f1_macro", report['macro avg']['f1-score'], step=epoch)
    
    return report['accuracy']


In [None]:
# Main Experiment Loop
experiment_name = "FaceMask_FasterRCNN"
try:
    mlflow.create_experiment(experiment_name)
except:
    pass
mlflow.set_experiment(experiment_name)

num_epochs = 5

with mlflow.start_run():
    # Log params
    mlflow.log_param("epochs", num_epochs)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("optimizer", "SGD")
    mlflow.log_param("lr", 0.005)
    
    label_map = {1: "with_mask", 2: "without_mask", 3: "incorrect"}

    for epoch in range(num_epochs):
        avg_loss = train_epoch(train_dataloader, device, model, optimizer, epoch, num_epochs)
        lr_scheduler.step()
        
        mlflow.log_metric("train_loss", avg_loss, step=epoch)
        print(f"Epoch {epoch+1} Train Loss: {avg_loss:.4f}")
        
        # Evaluate
        val_acc = evaluate(test_dataloader, device, model, epoch, label_map)
        print(f"Epoch {epoch+1} Val Accuracy (Box-Matched): {val_acc:.4f}")
        
    # Save Model
    if not os.path.exists('../models'):
        os.makedirs('../models')
    torch.save(model.state_dict(), '../models/model_best.pth')
    mlflow.log_artifact('../models/model_best.pth')
    
print("Training Complete. Check MLflow UI.")
