In [1]:
import cv2
import torch
import os
import time
from utils import get_parking_spots_from_mask, crop_patch_from_polygon, TemporalSmoother, map_detections_to_slots
from torchvision import transforms, models
import numpy as np
import argparse


In [2]:
def run_inference(
    mask_path='data/mask_1920_1080.png',
    video_path='data/parking_1920_1080_loop.mp4',
    model_path='checkpoints/best_model.pth',
    out_video='out/result.mp4'
):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    slots = get_parking_spots_from_mask(mask_path)

    # --- Load mobilenet with 2 outputs (empty / occupied) ---
    model = models.mobilenet_v2(pretrained=False)
    in_f = model.classifier[1].in_features
    model.classifier[1] = torch.nn.Linear(in_f, 2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device).eval()

    tf = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224,224)),
        transforms.ToTensor()
    ])

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 20
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    os.makedirs(os.path.dirname(out_video), exist_ok=True)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(out_video, fourcc, fps, (w, h))

    smoother = TemporalSmoother(k=5)
    frame_idx = 0
    start = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        occ_map = {}
        patches = []
        slot_ids = []

        # Crop patches for each slot
        for s in slots:
            patch = crop_patch_from_polygon(frame_rgb, s['polygon'], dst_size=(224,224))
            patches.append(tf(patch).unsqueeze(0))
            slot_ids.append(s['slot_id'])

        if patches:
            batch = torch.cat(patches).to(device)
            with torch.no_grad():
                logits = model(batch)       # shape: [N, 2]
                preds = torch.argmax(logits, dim=1).cpu().numpy()

            for sid, p in zip(slot_ids, preds):
                smoothed = smoother.update(sid, int(p))
                occ_map[sid] = smoothed

        # Draw slots
        for s in slots:
            sid = s['slot_id']
            poly = np.array(s['polygon'], dtype=np.int32)
            occ = occ_map.get(sid, 0)
            color = (0,255,0) if occ == 0 else (0,0,255)
            cv2.polylines(frame, [poly], True, color, thickness=2)
            cx, cy = s['centroid']
            txt = f"{sid}:{'Occ' if occ else 'Free'}"
            cv2.putText(frame, txt, (cx-20, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        # Count free slots
        free_count = sum(1 for v in occ_map.values() if v==0)
        cv2.putText(frame, f"Free: {free_count}/{len(slots)}", (20,50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255,255,255), 2)

        out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()
    print("Saved video to:", out_video)


In [3]:
# Run with default paths
run_inference(
    mask_path='data/mask_1920_1080.png',
    video_path='data/parking_1920_1080_loop.mp4',
    model_path='checkpoints/best_model.pth',
    out_video='out/result.mp4'
)




Saved video to: out/result.mp4


In [None]:
from IPython.display import Video

Video("out/result.mp4", embed=True)
