# üò∑ Face Mask Detection ‚Äî MobileNetV2 Transfer Learning (PyTorch)

A complete pipeline for detecting whether a person is wearing a face mask:

| Step | Description |
|------|-------------|
| 1 | **Dataset** ‚Äî Synthetic face images (or real dataset) |
| 2 | **Pre-process** ‚Äî Resize, normalise, augment |
| 3 | **Transfer Learning** ‚Äî MobileNetV2 ImageNet base + custom head |
| 4 | **Training** ‚Äî Adam + EarlyStopping |
| 5 | **Evaluation** ‚Äî Accuracy, loss curves, confusion matrix |
| 6 | **Webcam Demo** ‚Äî Real-time Haar-cascade + model inference |

## 0 ¬∑ Install Dependencies

In [None]:
import subprocess, sys

# PyTorch (CPU build ‚Äî no path-length issues on Windows)
subprocess.run([
    sys.executable, '-m', 'pip', 'install',
    'torch', 'torchvision',
    '--index-url', 'https://download.pytorch.org/whl/cpu', '-q'
], check=False)

# Other dependencies
for pkg in ['opencv-python', 'numpy', 'matplotlib', 'scikit-learn', 'pillow', 'seaborn']:
    subprocess.run([sys.executable, '-m', 'pip', 'install', pkg, '-q'], check=False)

print('‚úÖ Dependencies ready')

## 1 ¬∑ Imports

In [None]:
import os, time, random, warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image, ImageDraw

from sklearn.model_selection import train_test_split
from sklearn.metrics         import classification_report, confusion_matrix

import torch
import torch.nn as nn
from torch.utils.data       import Dataset, DataLoader
from torch.optim            import Adam
from torch.optim.lr_scheduler import StepLR
import torchvision.transforms as T
from torchvision.models      import mobilenet_v2, MobileNet_V2_Weights

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'PyTorch     : {torch.__version__}')
print(f'Device      : {DEVICE}')

## 2 ¬∑ Generate Synthetic Dataset

> **Using a real dataset?**  
> Download from https://github.com/chandrikadeb7/Face-Mask-Detection  
> and place images in `dataset/with_mask/` and `dataset/without_mask/`  
> then skip this cell.
>
> The cell below creates a **synthetic** dataset of drawn faces so the notebook  
> runs fully offline in seconds.

In [None]:
DATASET_DIR = 'dataset'
N_IMAGES    = 300        # per class ‚Äì increase for better accuracy
IMG_SIZE    = 224
CATEGORIES  = ['with_mask', 'without_mask']


def make_face(mask: bool, idx: int) -> Image.Image:
    rng = np.random.default_rng(seed=idx)
    bg  = tuple(int(c) for c in rng.integers(180, 255, 3))
    img = Image.new('RGB', (100, 100), bg)
    d   = ImageDraw.Draw(img)
    skin = tuple(int(c) for c in (rng.integers(180,220), rng.integers(140,180), rng.integers(100,140)))
    fx, fy, fw, fh = 20, 15, 60, 70
    d.ellipse([fx, fy, fx+fw, fy+fh], fill=skin)
    ey = fy + int(fh*.35)
    for ex in [fx+int(fw*.25), fx+int(fw*.65)]:
        d.ellipse([ex-4, ey-3, ex+4, ey+3], fill=(40,40,40))
    nx, ny = fx+fw//2, fy+int(fh*.55)
    d.ellipse([nx-3, ny-2, nx+3, ny+3], fill=(max(0,skin[0]-30), max(0,skin[1]-30), max(0,skin[2]-30)))
    if mask:
        mc = tuple(int(c) for c in rng.integers(0, 255, 3))
        d.rectangle([fx+3, fy+int(fh*.5), fx+fw-3, fy+int(fh*.92)], fill=mc)
        for row in range(fy+int(fh*.54), fy+int(fh*.90), 6):
            d.line([(fx+5,row),(fx+fw-5,row)], fill=(min(255,mc[0]+40),min(255,mc[1]+40),min(255,mc[2]+40)), width=1)
    else:
        d.arc([fx+15, fy+int(fh*.7), fx+fw-15, fy+int(fh*.88)], 10, 170, fill=(180,80,80), width=2)
    arr = np.array(img, dtype=np.float32) + rng.normal(0, 5, (100,100,3))
    return Image.fromarray(np.clip(arr,0,255).astype(np.uint8))


for cls, flag in [('with_mask', True), ('without_mask', False)]:
    path = os.path.join(DATASET_DIR, cls)
    if os.path.exists(path) and len(os.listdir(path)) >= N_IMAGES:
        print(f'[SKIP] {cls} ‚Äî {len(os.listdir(path))} images already exist'); continue
    os.makedirs(path, exist_ok=True)
    offset = 0 if flag else N_IMAGES
    for i in range(N_IMAGES):
        make_face(flag, i+offset).save(os.path.join(path, f'{cls}_{i:04d}.png'))
    print(f'[OK ] Generated {N_IMAGES} images ‚Üí {path}')

# ‚îÄ‚îÄ Preview ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
fig, axes = plt.subplots(2, 5, figsize=(13, 5))
fig.suptitle('Synthetic Dataset Samples', fontsize=14, fontweight='bold')
for row, cls in enumerate(CATEGORIES):
    files = os.listdir(os.path.join(DATASET_DIR, cls))[:5]
    for col, fname in enumerate(files):
        img = Image.open(os.path.join(DATASET_DIR, cls, fname))
        axes[row][col].imshow(img)
        axes[row][col].set_title(cls.replace('_',' ').title(), fontsize=8)
        axes[row][col].axis('off')
plt.tight_layout(); plt.show()

## 3 ¬∑ Custom Dataset Class & Data Loaders

In [None]:
class MaskDataset(Dataset):
    def __init__(self, paths, labels, transform=None):
        self.paths = paths; self.labels = labels; self.transform = transform
    def __len__(self): return len(self.paths)
    def __getitem__(self, idx):
        img = Image.open(self.paths[idx]).convert('RGB')
        if self.transform: img = self.transform(img)
        return img, self.labels[idx]


# ‚îÄ‚îÄ Collect paths & labels ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
paths, labels = [], []
for cls_idx, cat in enumerate(CATEGORIES):
    folder = os.path.join(DATASET_DIR, cat)
    for fname in os.listdir(folder):
        fpath = os.path.join(folder, fname)
        if os.path.isfile(fpath):
            paths.append(fpath); labels.append(cls_idx)

print(f'Total images : {len(paths)}')
for i, cat in enumerate(CATEGORIES):
    print(f'  {cat}: {labels.count(i)}')

X_train, X_test, y_train, y_test = train_test_split(
    paths, labels, test_size=0.20, stratify=labels, random_state=42
)

# ‚îÄ‚îÄ Transforms ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
MEAN, STD = [0.485,0.456,0.406], [0.229,0.224,0.225]

train_tf = T.Compose([
    T.Resize((IMG_SIZE, IMG_SIZE)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(20),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    T.ToTensor(),
    T.Normalize(MEAN, STD)
])
val_tf = T.Compose([T.Resize((IMG_SIZE,IMG_SIZE)), T.ToTensor(), T.Normalize(MEAN,STD)])

BATCH_SIZE   = 32
train_loader = DataLoader(MaskDataset(X_train, y_train, train_tf), batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
val_loader   = DataLoader(MaskDataset(X_test,  y_test,  val_tf),   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f'Train batches : {len(train_loader)}   Val batches : {len(val_loader)}')

## 4 ¬∑ Build MobileNetV2 Model

```
MobileNetV2 (frozen, ImageNet weights)
       ‚îÇ
  Dropout (0.5)
       ‚îÇ
  Linear (1280 ‚Üí 128)
       ‚îÇ
    ReLU
       ‚îÇ
  Dropout (0.3)
       ‚îÇ
  Linear (128 ‚Üí 2)
       ‚îÇ
  Softmax         ‚Üê with_mask / without_mask
```

In [None]:
weights = MobileNet_V2_Weights.IMAGENET1K_V1
model   = mobilenet_v2(weights=weights)

# Freeze the entire base
for param in model.parameters():
    param.requires_grad = False

# Replace classifier head
in_feat = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(in_feat, 128),
    nn.ReLU(),
    nn.Dropout(p=0.3),
    nn.Linear(128, 2)
)
model = model.to(DEVICE)

total     = sum(p.numel() for p in model.parameters())
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total params     : {total:,}')
print(f'Trainable params : {trainable:,}  (head only)')
print(f'Frozen params    : {total-trainable:,}  (MobileNetV2 base)')

## 5 ¬∑ Train

In [None]:
LR        = 1e-4
EPOCHS    = 20
PATIENCE  = 5
MODEL_PATH= 'mask_detector.pth'

criterion = nn.CrossEntropyLoss()
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

history          = {'train_acc':[], 'val_acc':[], 'train_loss':[], 'val_loss':[]}
best_val_acc     = 0.0
patience_counter = 0

print('Training ‚Ä¶\n')
for epoch in range(1, EPOCHS+1):
    t0 = time.time()

    # ‚îÄ‚îÄ train ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    model.train()
    r_loss = r_corr = r_n = 0
    for imgs, lbls in train_loader:
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        optimizer.zero_grad()
        out  = model(imgs)
        loss = criterion(out, lbls)
        loss.backward(); optimizer.step()
        r_loss += loss.item()*imgs.size(0)
        r_corr += (out.argmax(1)==lbls).sum().item()
        r_n    += imgs.size(0)
    t_loss, t_acc = r_loss/r_n, r_corr/r_n

    # ‚îÄ‚îÄ validate ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    model.eval()
    v_loss = v_corr = v_n = 0
    with torch.no_grad():
        for imgs, lbls in val_loader:
            imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
            out  = model(imgs)
            v_loss += criterion(out, lbls).item()*imgs.size(0)
            v_corr += (out.argmax(1)==lbls).sum().item()
            v_n    += imgs.size(0)
    v_loss, v_acc = v_loss/v_n, v_corr/v_n

    history['train_acc'].append(t_acc);  history['val_acc'].append(v_acc)
    history['train_loss'].append(t_loss); history['val_loss'].append(v_loss)

    saved = ''
    if v_acc > best_val_acc:
        best_val_acc = v_acc
        torch.save(model.state_dict(), MODEL_PATH)
        saved = '  ‚úÖ saved'
        patience_counter = 0
    else:
        patience_counter += 1

    print(f'  Ep {epoch:02}/{EPOCHS}  '
          f'loss={t_loss:.4f} acc={t_acc:.4f}  |  '
          f'val_loss={v_loss:.4f} val_acc={v_acc:.4f}  '
          f'({time.time()-t0:.1f}s){saved}')

    if patience_counter >= PATIENCE:
        print(f'\nEarly stopping at epoch {epoch}'); break
    scheduler.step()

print(f'\n‚úÖ Training done!  Best val_acc = {best_val_acc:.4f}  ‚Üí  {MODEL_PATH}')

## 6 ¬∑ Training Curves

In [None]:
er = len(history['train_acc'])
x  = range(1, er+1)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 4))
fig.suptitle('MobileNetV2 Transfer Learning ‚Äî Face Mask Detection', fontsize=14, fontweight='bold')

ax1.plot(x, history['train_acc'], label='Train', lw=2, color='#4C72B0')
ax1.plot(x, history['val_acc'],   label='Val',   lw=2, color='#DD8452', ls='--')
ax1.set_title('Accuracy'); ax1.set_xlabel('Epoch'); ax1.legend(); ax1.grid(alpha=.3); ax1.set_ylim([0,1.05])

ax2.plot(x, history['train_loss'], label='Train', lw=2, color='#4C72B0')
ax2.plot(x, history['val_loss'],   label='Val',   lw=2, color='#DD8452', ls='--')
ax2.set_title('Loss'); ax2.set_xlabel('Epoch'); ax2.legend(); ax2.grid(alpha=.3)

plt.tight_layout()
plt.savefig('training_plot.png', dpi=150)
plt.show()
print('Saved ‚Üí training_plot.png')

## 7 ¬∑ Evaluate

In [None]:
import torch
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.eval()

all_preds, all_true = [], []
with torch.no_grad():
    for imgs, lbls in val_loader:
        out = model(imgs.to(DEVICE))
        all_preds.extend(out.argmax(1).cpu().numpy())
        all_true.extend(lbls.numpy())

print('Classification Report:')
print(classification_report(all_true, all_preds, target_names=CATEGORIES))

## 8 ¬∑ Confusion Matrix

In [None]:
cm = confusion_matrix(all_true, all_preds)

fig, ax = plt.subplots(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=CATEGORIES, yticklabels=CATEGORIES,
            annot_kws={'size':14}, ax=ax)
ax.set_xlabel('Predicted', fontsize=12)
ax.set_ylabel('Actual', fontsize=12)
ax.set_title('Confusion Matrix ‚Äî Face Mask Detector', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150)
plt.show()
print('Saved ‚Üí confusion_matrix.png')

## 9 ¬∑ Predict on a Random Test Sample

In [None]:
idx_pick = random.randint(0, len(X_test)-1)
true_lbl = CATEGORIES[y_test[idx_pick]]

img_pil  = Image.open(X_test[idx_pick]).convert('RGB')
inp      = val_tf(img_pil).unsqueeze(0).to(DEVICE)

with torch.no_grad():
    out  = model(inp)
    prob = torch.softmax(out, dim=1)[0]

pred_idx  = prob.argmax().item()
pred_lbl  = CATEGORIES[pred_idx]
conf      = prob[pred_idx].item() * 100

color = '#00b894' if pred_lbl == 'with_mask' else '#d63031'

fig, ax = plt.subplots(figsize=(4,4))
ax.imshow(img_pil)
ax.set_title(f'Predicted : {pred_lbl}\nTrue : {true_lbl}\nConfidence : {conf:.1f}%',
             fontsize=11, color=color, fontweight='bold')
ax.axis('off')
plt.tight_layout()
plt.show()

## 10 ¬∑ üé• Real-Time Webcam Demo

> **Run this cell to start live mask detection from your webcam.**  
> A separate OpenCV window will open. Press **Q** to quit.

In [None]:
import cv2

LABELS_WC = ['Mask ‚úÖ', 'No Mask ‚ùå']
COLORS_WC = [(0, 200, 0), (0, 0, 220)]   # BGR
CONF_MIN  = 0.60

# Load best saved model
webcam_model = mobilenet_v2(weights=None)
in_feat_wc   = webcam_model.classifier[1].in_features
webcam_model.classifier = nn.Sequential(
    nn.Dropout(0.5), nn.Linear(in_feat_wc, 128), nn.ReLU(),
    nn.Dropout(0.3), nn.Linear(128, 2)
)
webcam_model.load_state_dict(torch.load(MODEL_PATH, map_location='cpu'))
webcam_model.eval()

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print('‚ö†Ô∏è  Cannot open webcam ‚Äî skipping demo.')
else:
    print('Webcam open ‚Äî press Q to quit.')
    with torch.no_grad():
        while True:
            ret, frame = cap.read()
            if not ret: break

            gray  = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(gray, 1.1, 5, minSize=(60,60))

            for (x,y,w,h) in faces:
                roi  = cv2.cvtColor(frame[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
                pil  = Image.fromarray(roi)
                inp  = val_tf(pil).unsqueeze(0)
                out  = webcam_model(inp)
                prob = torch.softmax(out,1)[0]
                idx2 = prob.argmax().item()
                conf = prob[idx2].item()
                if conf < CONF_MIN: continue
                lbl   = f'{LABELS_WC[idx2]} ({conf*100:.1f}%)'
                color = COLORS_WC[idx2]
                cv2.rectangle(frame, (x,y), (x+w,y+h), color, 2)
                cv2.rectangle(frame, (x,y-30), (x+w,y), color, -1)
                cv2.putText(frame, lbl, (x+4,y-8), cv2.FONT_HERSHEY_SIMPLEX, .60, (255,255,255), 2)

            cv2.putText(frame,'Face Mask Detector  |  Q to quit',(10,25),cv2.FONT_HERSHEY_SIMPLEX,.65,(200,200,200),1)
            cv2.imshow('Face Mask Detector', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'): break

    cap.release(); cv2.destroyAllWindows()
    print('Webcam closed.')

---
## Summary

| Item | Value |
|------|-------|
| Framework | **PyTorch** |
| Base model | MobileNetV2 (ImageNet) ‚Äî **frozen** |
| Custom head | Dropout ‚Üí Linear(128, ReLU) ‚Üí Dropout ‚Üí Linear(2) |
| Loss | CrossEntropyLoss |
| Optimiser | Adam (lr=1e-4) |
| Augmentation | HFlip, Rotation, ColorJitter |
| Output | `mask_detector.pth` |

### üí° To improve accuracy
1. Use the **real dataset** (~4 k images) from https://github.com/chandrikadeb7/Face-Mask-Detection  
2. **Unfreeze** MobileNetV2 top layers for fine-tuning with a very low lr (‚âà1e-5)  
3. Increase `N_IMAGES` to 500+ if using synthetic data  
4. Try **EfficientNet-B0** as the backbone