## Environment Setup

Use the following commands to update your system, install `ffmpeg`, upgrade `pip`, and install all the necessary Python dependencies.

### 1. Update and Install `ffmpeg`

```bash
!apt-get -y update && apt-get -y install ffmpeg

In [None]:
!apt-get -y update && apt-get -y install ffmpeg
!python -m pip install --upgrade pip
!pip install "opencv-python==4.11.0.86" "numpy==1.26.4" \
             "pycryptodome==3.20.0" "onnxruntime==1.18.1" \
             "timm==1.0.9" "rich==14.1.0"
# Torch and TF are preinstalled on Kaggle; you can pin if you need.

## Project Setup Script

This script sets up the working environment for a project by copying a repository, preparing directories, and validating data paths.  
It ensures that all necessary components (repo, videos, and CSV index) are available before proceeding.

---

### 📁 1. Import Required Modules

```python
from pathlib import Path
import shutil
import sys

In [None]:
from pathlib import Path
import shutil
import sys

# Define project structure (replace with your actual dataset/repo names)
DATASET_ROOT = Path("/path/to/dataset")
WORK_ROOT = Path("/path/to/working_dir")

REPO_NAME = "YourRepoName"
REPO_SRC = DATASET_ROOT / REPO_NAME
REPO_DST = WORK_ROOT / REPO_NAME

# Copy the repo into working directory so it can be imported/edited freely
if REPO_DST.exists():
    shutil.rmtree(REPO_DST)
shutil.copytree(REPO_SRC, REPO_DST)

# Add repo to Python path
sys.path.append(str(REPO_DST))

# Outputs
OUT_ROOT = WORK_ROOT / "output"
OUT_ROOT.mkdir(parents=True, exist_ok=True)

# Data files (adjust to your dataset layout)
VIDEOS_DIR = DATASET_ROOT / "videos"
CSV_PATH = DATASET_ROOT / "video_index.csv"

# Sanity checks
assert REPO_DST.exists(), "Repo not found"
assert VIDEOS_DIR.exists(), "Videos directory missing"
assert CSV_PATH.exists(), "CSV file missing"

# Info logs
print(f"Repo:   {REPO_DST}")
print(f"Videos: {len(list(VIDEOS_DIR.glob('*.mov')))}")
print(f"CSV:    {CSV_PATH}")

## 🔧 Environment Configuration

This script sets important environment variables to control **TensorFlow**, **JAX**, and **XLA** behavior.  
It’s especially useful in development environments (e.g., Kaggle, Colab, or local CPU-only runs) where you want to **suppress logs**, **force CPU usage**, or **avoid unnecessary GPU allocation**.

---

### 🧠 1. Configure Environment Variables

```python
import os

# Suppress TensorFlow logs (INFO, WARNING, and ERROR)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Disable GPU usage for TensorFlow and JAX
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Force JAX to run on CPU only
os.environ["JAX_PLATFORMS"] = "cpu"

# Prevent XLA from preallocating large GPU memory chunks
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"

# Disable oneDNN optimization logs to reduce console noise
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"       # hide TF INFO/WARN/ERROR
os.environ["CUDA_VISIBLE_DEVICES"] = ""        # no GPU for TF/JAX
os.environ["JAX_PLATFORMS"] = "cpu"            # JAX stays on CPU
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"  # don't grab big GPU chunks
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"      # avoid extra kernel logs

## 🔐 AES Encryption & Video Processing Utilities

This script provides a set of **utility functions** for secure data handling, video preprocessing, and frame selection.  
It’s designed for machine learning pipelines that process video frames, encrypt sensitive metadata, and handle orientation and target frame extraction.

---

### 📦 1. Imports & Dependencies

```python
# -*- coding: utf-8 -*-
from Crypto.Cipher import AES, PKCS1_OAEP
from Crypto.Random import get_random_bytes
from Crypto.PublicKey import RSA
import base64, json, numpy as np, cv2, time, subprocess, torch, csv, ast, shlex, uuid, os
from pathlib import Path

import src.blur_functions as bf
import src.utils as utils
from src.backbones import get_model

In [None]:
# -*- coding: utf-8 -*-
from Crypto.Cipher import AES
from Crypto.Random import get_random_bytes
from Crypto.Cipher import PKCS1_OAEP
from Crypto.PublicKey import RSA
import base64, json, numpy as np, cv2, time, subprocess, torch, csv, ast, shlex, uuid, os
from pathlib import Path

import src.blur_functions as bf
import src.utils as utils
from src.backbones import get_model

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"   # CPU; remove if you want GPU for embeddings

def box_center(box):
    x1, y1, x2, y2 = box
    return ((x1 + x2) // 2, (y1 + y2) // 2)

def pad(data: bytes):
    pad_len = 16 - (len(data) % 16)
    return data + bytes([pad_len]) * pad_len

def encrypt_data_aes128(data: bytes, key: bytes) -> str:
    cipher = AES.new(key, AES.MODE_ECB)
    return base64.b64encode(cipher.encrypt(pad(data))).decode("utf-8")

def _parse_frame_list_cell(cell):
    if cell is None: return []
    if isinstance(cell, list): return [int(x) for x in cell]
    s = str(cell).strip()
    if not s: return []
    try:
        val = ast.literal_eval(s)
        if isinstance(val, (list, tuple)): return [int(x) for x in val]
    except Exception:
        pass
    out = []
    for p in s.strip("[]").split(","):
        p = p.strip()
        if p and p.lstrip("-").isdigit(): out.append(int(p))
    return out

def load_target_frames_for_video(csv_path, video_name_key):
    target = set()
    with open(csv_path, "r", newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            v = row.get("video_name", "")
            if Path(v).stem == Path(video_name_key).stem:
                for idx in _parse_frame_list_cell(row.get("frame_numbers", "")):
                    target.add(int(idx))
    return target

def probe_rotation_deg(path) -> int:
    try:
        out = subprocess.check_output(
            shlex.split(
                f'ffprobe -v error -select_streams v:0 -show_entries stream_tags=rotate -of json "{path}"'
            )
        ).decode("utf-8")
        data = json.loads(out)
        rotate = int(data.get("streams", [{}])[0].get("tags", {}).get("rotate", "0"))
        return rotate % 360
    except Exception:
        return 0

def rotate_fixed(img, deg):
    if deg == 90:  return cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    if deg == 180: return cv2.rotate(img, cv2.ROTATE_180)
    if deg == 270: return cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
    return img

## ⚙️ Configuration: Video Processing Pipeline

This script defines a `Config` class that centralizes **all key parameters** for your video processing pipeline — including input/output paths, model locations, runtime options, and detection thresholds.  
It also sets up public key loading for encryption.

---

### 🛠️ 1. Configuration Class

```python
class Config:
    # ▶️ Input / Output Settings
    INPUT_VIDEO_PATH   = str(VIDEOS_DIR / "YourVideoName.mp4")
    OUTPUT_VIDEO_PATH  = str(OUT_ROOT / "single" / "blurred.mp4")
    SAVE_OUTPUT        = False         # Save processed video
    AUDIO              = False         # Include audio in output

    # 🤖 Models
    DETECTOR_MODEL     = "yunet"       # Face detector model
    EMBEDDING_MODEL    = "edgeface_s_gamma_05"
    EMBEDDING_MODEL_PATH = str(REPO_NAME / "src" / "models" / "edgeface_s_gamma_05.pt")

    # ⏱️ Frame Processing
    FRAME_SKIP         = 3             # Process every 3rd frame
    TRACKING_SKIP      = 0             # Tracking interval
    BLUR_ENABLED       = True          # Enable face blurring

    # 🔄 Video Orientation
    ROTATION_DEGREES   = 0             # 0 => auto from metadata; use 90/180/270 to override

    # 📍 Tracking Thresholds
    LANDMARK_STALENESS_THRESHOLD = 15
    BOX_STALENESS_THRESHOLD      = 15
    MSE_THRESHOLD                = 150

    # 📦 Detector Settings
    SCRFD_MODEL_PATH   = str(REPO_NAME / "src" / "models" / "det_500m.onnx")
    SCRFD_INPUT_SIZE   = (640, 640)

    # 🎨 Visualization Options
    OVERLAY_LANDMARKS     = False
    OVERLAY_DETECTOR_BOX  = False
    DISPLAY_VIDEO         = False

    # 📊 Frame Metadata
    FRAMES_CSV_PATH   = str(CSV_PATH)
    FRAME_JSON_DIR    = str(OUT_ROOT / "single" / "frame_json")
    # FRAME_IMAGE_DIR   = str(OUT_ROOT / "single" / "frame_images")

In [None]:
class Config:
    # default single-video (overridden when batching)
    INPUT_VIDEO_PATH   = str(VIDEOS_DIR / "YourVideoName.mp4")
    OUTPUT_VIDEO_PATH  = str(OUT_ROOT / "single" / "blurred.mp4")
    SAVE_OUTPUT        = False
    AUDIO              = False

    DETECTOR_MODEL     = "yunet"
    EMBEDDING_MODEL    = "edgeface_s_gamma_05"
    EMBEDDING_MODEL_PATH = str(REPO_NAME / "src" / "models" / "edgeface_s_gamma_05.pt")

    FRAME_SKIP         = 3
    TRACKING_SKIP      = 0
    BLUR_ENABLED       = True

    # 0 => use ffprobe metadata; use 90/180/270 to force
    ROTATION_DEGREES   = 0

    LANDMARK_STALENESS_THRESHOLD = 15
    BOX_STALENESS_THRESHOLD      = 15
    MSE_THRESHOLD                = 150

    SCRFD_MODEL_PATH   = str(REPO_NAME / "src" / "models" / "det_500m.onnx")
    SCRFD_INPUT_SIZE   = (640, 640)

    OVERLAY_LANDMARKS = False
    OVERLAY_DETECTOR_BOX = False
    DISPLAY_VIDEO     = False

    FRAMES_CSV_PATH   = str(CSV_PATH)
    FRAME_JSON_DIR    = str(OUT_ROOT / "single" / "frame_json")
    # FRAME_IMAGE_DIR   = str(OUT_ROOT / "single" / "frame_images")

PUBLIC_KEY_PATH = REPO_NAME / "encryption" / "public_key.pem"
assert PUBLIC_KEY_PATH.exists(), "public_key.pem not found"
public_key = RSA.import_key(open(PUBLIC_KEY_PATH, "rb").read())

## 🚀 Pipeline Initialization & Model Loading

This code is the **initial setup phase** of the face-processing pipeline.  
It prepares the environment by creating necessary directories, initializing detection and landmark models, and loading a face embedding network.  
After this step, the pipeline is ready to process video frames for detection, landmark extraction, blurring, or embedding tasks.

---

### 🗂️ 1. Create Required Output Directories

```python
config = Config()

# make sure dirs exist for the current config
Path(config.FRAME_JSON_DIR).mkdir(parents=True, exist_ok=True)
# Path(config.FRAME_IMAGE_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
config = Config()

# make sure dirs exist for the current config
Path(config.FRAME_JSON_DIR).mkdir(parents=True, exist_ok=True)
# Path(config.FRAME_IMAGE_DIR).mkdir(parents=True, exist_ok=True)

detector = utils.initialize_detector(config)
interpretor, input_details, output_details = utils.initialize_landmark_detector()

if config.EMBEDDING_MODEL == "edgeface_s_gamma_05":
    model_name = "edgeface_s_gamma_05"
    model = get_model(model_name)
    try:
        state = torch.load(config.EMBEDDING_MODEL_PATH, map_location='cpu', weights_only=True)
        model.load_state_dict(state)
    except TypeError:
        model.load_state_dict(torch.load(config.EMBEDDING_MODEL_PATH, map_location='cpu'))
    model.eval()
else:
    model = None  # extend if you support other backbones

## 🎥 `process_video_one(cfg: Config)`

The `process_video_one()` function is the **main driver** of the video anonymization and face-processing pipeline.  
It reads a video frame-by-frame, detects and tracks faces, extracts landmarks, blurs sensitive regions, encrypts metadata, and optionally generates embeddings — all while logging performance statistics.

---

### 🧠 Overview of What It Does

| Stage | Purpose |
|-------|---------|
| 🧩 Initialization | Prepare variables, trackers, and video I/O |
| 🎞️ Frame Processing Loop | Read and process each video frame |
| 🔍 Face Detection & Tracking | Detect faces or track them across frames |
| 📍 Landmark Extraction | Get facial landmarks for each face |
| 🫣 Blurring & Metadata | Blur detected faces and save metadata |
| 🔐 Encryption & Storage | Encrypt face data and embeddings |
| 📊 Statistics & Logging | Compute performance metrics and save logs |

---

### 1️⃣ Initialization & Setup

```python
start_process_time = time.perf_counter()
face_metadata = {}
face_tracks = {}
face_id_to_aes_key = {}
MAX_CENTER_DIST = 60
lk_params = dict(winSize=(15, 15), maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

In [None]:

def process_video_one(cfg: Config):
    start_process_time = time.perf_counter()
    total_inference_times = []
    total_blur_times = []; total_detector_times=[]; total_landmarker_times=[]
    total_tracking_times=[]; total_mse_times=[]
    prev_detected_face_scores = [] 
    store_scores = []

    # per-run trackers
    face_metadata = {}
    face_tracks = {}
    face_id_to_aes_key = {}
    MAX_CENTER_DIST = 60
    lk_params = dict(winSize=(15, 15), maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

    current_video_name = Path(cfg.INPUT_VIDEO_PATH).stem
    TARGET_FRAMES = load_target_frames_for_video(cfg.FRAMES_CSV_PATH, current_video_name)

    # rotation
    meta_deg = probe_rotation_deg(cfg.INPUT_VIDEO_PATH)
    effective_deg = cfg.ROTATION_DEGREES if cfg.ROTATION_DEGREES in (90,180,270) else meta_deg
    print(f"[{current_video_name}] rotation = {effective_deg}° (meta={meta_deg}°)")

    # open IO
    cap, out, _, _ = utils.initialize_video_io(cfg)
    if not cap or not cap.isOpened():
        raise RuntimeError(f"Failed to open video: {cfg.INPUT_VIDEO_PATH}")

    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)); h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)); fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"[{current_video_name}] props: {w}x{h} @{fps:.2f}")

    frame_count = 0; blur_count=0; detector_count=0; landmarker_count=0; tracking_count=0; mse_thresh_count=0
    detected_faces_boxes = []; last_landmark_boxes=[]; prev_frame=None; prev_gray=None
    frames_since_last_box = 0

    while cap.isOpened():
        detected_face_scores = []
        frame_start_time = time.perf_counter()
        ret, frame = cap.read()
        if not ret: break

        frame_count += 1
        frame_idx_zero = frame_count - 1

        frame = rotate_fixed(frame, effective_deg)
        frame_blur_records = []

        # MSE motion gating
        mse_values = []
        if prev_frame is not None and detected_faces_boxes:
            for face_region in detected_faces_boxes:
                st = time.perf_counter()
                mse = utils.calculate_frame_diff(prev_frame, frame, face_region)
                en = time.perf_counter()
                total_mse_times.append((en-st)*1000)
                mse_values.append(mse)

        run_detector = (frame_count == 1
                        or frame_count % (cfg.FRAME_SKIP + 1) == 0
                        or (mse_values and max(mse_values) > cfg.MSE_THRESHOLD))
        if (mse_values and max(mse_values) > cfg.MSE_THRESHOLD):
            mse_thresh_count += 1

        output_frame = frame
        rgb_frame = output_frame
        prev_frame = output_frame
        current_height, current_width = frame.shape[:2]
        detected_face_scores = []

        if run_detector:
            detector_count += 1
            st = time.perf_counter()
            faces = utils.detect_faces(rgb_frame, detector, cfg)
            #print("faces", faces)
            en = time.perf_counter(); total_detector_times.append((en-st)*1000)
            detected_faces_boxes = [utils.expand_box_with_margin(f["box"], 0.1, output_frame.shape[1], output_frame.shape[0]) for f in faces]
            detected_face_scores = [f["score"] for f in faces]
            store_scores = copy.deepcopy(detected_face_scores)
            #print("detected faces", detected_face_scores)
            if detected_faces_boxes:
                frames_since_last_box = 0
            else:
                frames_since_last_box += 1
                if frames_since_last_box >= cfg.BOX_STALENESS_THRESHOLD:
                    detected_faces_boxes = []
                face_tracks = {}
            prev_detected_face_scores = store_scores
        else:
           # if not detected_face_scores and prev_detected_face_scores:
             #   detected_face_scores = prev_detected_face_scores
            frames_since_last_box += 1
            if frames_since_last_box >= cfg.BOX_STALENESS_THRESHOLD:
                detected_faces_boxes = []
            if prev_gray is not None and (frame_count % (cfg.TRACKING_SKIP+1) == 0):
                st = time.perf_counter()
                detected_faces_boxes = [utils.track_box_with_optical_flow(prev_gray, rgb_frame, box, lk_params, scale_factor=0.4)[0]
                                        for box in detected_faces_boxes]
                en = time.perf_counter(); tracking_count += 1; total_tracking_times.append((en-st)*1000)

        new_tracks = {}
        for i, box in enumerate(detected_faces_boxes):
            cx, cy = box_center(box)
            matched_id = None; min_dist = float("inf")
            for face_id, info in face_tracks.items():
                pcx, pcy = box_center(info["box"])
                dist = np.linalg.norm(np.array([cx, cy]) - np.array([pcx, pcy]))
                if dist < MAX_CENTER_DIST and dist < min_dist:
                    matched_id = face_id; min_dist = dist

            if matched_id:
                new_tracks[matched_id] = {"box": box, "aes_key": face_tracks[matched_id]["aes_key"]}
            else:
                new_id = str(uuid.uuid4()); new_key = get_random_bytes(16)
                face_id_to_aes_key[new_id] = new_key
                new_tracks[new_id] = {"box": box, "aes_key": new_key}

            x1, y1, x2, y2 = box
            if x1 < x2 and y1 < y2:
                x1=max(x1,0); y1=max(y1,0); x2=min(x2,current_width); y2=min(y2,current_height)
                face_roi_rgb = rgb_frame[y1:y2, x1:x2]
                if face_roi_rgb.size > 0:
                    st = time.perf_counter()
                    landmarks, score = utils.get_landmarks_interpretor_with_score(face_roi_rgb, interpretor, input_details, output_details)
                    en = time.perf_counter(); total_landmarker_times.append((en-st)*1000); landmarker_count += 1

                    landmarks_scaled = utils.scale_landmarks(landmarks, x1, y1, x2-x1, y2-y1)

                    face_id = matched_id if matched_id else new_id
                    track_info = new_tracks[face_id]; aes_key = track_info["aes_key"]

                    frame_h, frame_w = frame.shape[:2]
                    hull = cv2.convexHull(landmarks_scaled)
                    hull_pts = np.array(hull).reshape(-1,2).astype(int).tolist()

                    # base bounding rect
                    x, y, w, h = cv2.boundingRect(hull)

                    # scale box 1.15x
                    scale = 1.0
                    cx, cy = x + w/2, y + h/2
                    new_w, new_h = w * scale, h * scale
                    x1 = int(max(0, cx - new_w/2))
                    y1 = int(max(0, cy - new_h/2))
                    x2 = int(min(frame_w, cx + new_w/2))
                    y2 = int(min(frame_h, cy + new_h/2))

                    # --- discard very small faces (< 30x30 px²) ---
                    box_area = (x2 - x1) * (y2 - y1)
                    if box_area < 30 * 30:
                        continue

                    # save record
                    # choose detection score with fallbacks
                    if i < len(store_scores):
                        det_score_val = float(store_scores[i])
                    elif len(detected_face_scores) > 0:
                        det_score_val = float(store_scores[-1])
                    elif prev_detected_face_scores:
                        det_score_val = float(prev_detected_face_scores[-1])
                    else:
                        det_score_val = 0.5
                        print("random")
                    
                    frame_blur_records.append({
                        "face_id": face_id,
                        "pred_bbox": [x1, y1, x2, y2],
                        "hull": hull_pts,
                        "det_score": det_score_val,
                        "land_score": float(score)
                    })
                    #print("score", i, len(detected_face_scores), "→", det_score_val)

                    # adjust landmarks relative to cropped box
                    landmarks_adjusted = np.array(landmarks_scaled)
                    landmarks_adjusted[:,0] -= x1
                    landmarks_adjusted[:,1] -= y1

                    # crop face image
                    face_image_for_encryption = rgb_frame[y1:y2, x1:x2]
                    _, buffer = cv2.imencode('.png', face_image_for_encryption)
                    face_image_encoded = base64.b64encode(buffer).decode('utf-8')

                    data = {
                        "frame": frame_count,
                        "landmarks": landmarks_adjusted.tolist(),
                        "face_image": face_image_encoded,
                        "box": [x1, y1, x2, y2],
                        "box_score": float(detected_face_scores[i]) if i < len(detected_face_scores) else float("nan"),
                        "land_score": float(score),
                        "face_id": face_id
                    }
                    face_metadata.setdefault(face_id, []).append(data)

                    # blur if enabled
                    if cfg.BLUR_ENABLED:
                        st=time.perf_counter()
                        output_frame = bf.apply_blur_new(rgb_frame, hull, x1, y1, x2-x1, y2-y1)
                        en=time.perf_counter(); total_blur_times.append((en-st)*1000); blur_count+=1

        face_tracks = new_tracks
        prev_gray = output_frame

        # save full-video out if requested
        if cfg.SAVE_OUTPUT and out: out.write(output_frame)

        # save per-frame artifacts only for CSV target frames
        if frame_idx_zero in TARGET_FRAMES:
            base = f"{current_video_name}_frame{frame_idx_zero}"
            json_out_path = Path(cfg.FRAME_JSON_DIR) / f"{base}.json"
            json_out_path.parent.mkdir(parents=True, exist_ok=True)
            with open(json_out_path, "w", encoding="utf-8") as jf:
                json.dump(frame_blur_records, jf, indent=2)
            #img_out_path  = Path(cfg.FRAME_IMAGE_DIR)  / f"{base}.png"
            #cv2.imwrite(str(img_out_path), output_frame)

        frame_end_time = time.perf_counter()
        total_inference_times.append((frame_end_time - frame_start_time)*1000)

    # cleanup
    cap.release()
    if out: out.release()
    cv2.destroyAllWindows()

    # postprocess encryption + embeddings
    encrypted_face_metadata = {}; to_be_embedded = {}
    for face_id, entries in face_metadata.items():
        aes_key = face_id_to_aes_key[face_id]
        encrypted_entries = []
        best_idx = max(range(len(entries)), key=lambda k: entries[k].get("land_score", -1))
        for entry in entries:
            encrypted_entries.append(encrypt_data_aes128(json.dumps(entry).encode("utf-8"), aes_key))
        encrypted_face_metadata[f"frame_{face_id}"] = encrypted_entries
        to_be_embedded[face_id] = (entries[best_idx].get("face_image"), entries[best_idx].get("landmarks"))

    (OUT_ROOT/"video_metadata_encrypted.json").write_text(json.dumps(encrypted_face_metadata, indent=2))

    if to_be_embedded and (model is not None):
        batch_tensor, face_id_order = utils.preprocess_face_images(to_be_embedded, config.EMBEDDING_MODEL)
        if batch_tensor is not None:
            with torch.no_grad():
                embeddings = model(batch_tensor)
            face_embeddings = embeddings.numpy().tolist()
            cipher_rsa = PKCS1_OAEP.new(public_key)
            face_id_to_combined_key_embedding = {}
            for i, face_id in enumerate(face_id_order):
                aes_key = face_id_to_aes_key[face_id]
                encrypted_aes_key_b64 = base64.b64encode(cipher_rsa.encrypt(aes_key)).decode('utf-8')
                encrypted_embedding_b64 = encrypt_data_aes128(json.dumps(face_embeddings[i]).encode('utf-8'), aes_key)
                face_id_to_combined_key_embedding[face_id] = {
                    "aes_key": encrypted_aes_key_b64,
                    "embedding": encrypted_embedding_b64
                }
            face_id_to_combined_key_embedding["video_identifier"] = str(uuid.uuid4())
            (OUT_ROOT/"face_keys.json").write_text(json.dumps(face_id_to_combined_key_embedding, indent=2))
        else:
            (OUT_ROOT/"face_keys.json").write_text(json.dumps({"video_identifier": str(uuid.uuid4())}, indent=2))
    else:
        (OUT_ROOT/"face_keys.json").write_text(json.dumps({"video_identifier": str(uuid.uuid4())}, indent=2))

    # stats
        # stats
    if total_inference_times:
        avg_inf = sum(total_inference_times)/len(total_inference_times)
        avg_fps = 1000/avg_inf if avg_inf>0 else 0
        print(f"[{current_video_name}] frames: {frame_count}  avg_ms: {avg_inf:.2f}  fps~{avg_fps:.2f}")

    # --- log total blurring time ---
    total_blurring_time = sum(total_blur_times)  # in ms
    blur_log = {
        "video_name": str(cfg.INPUT_VIDEO_PATH),
        "total_blurring_time_ms": total_blurring_time
    }
    blur_log_path = OUT_ROOT / "blur_stats.json"
    if blur_log_path.exists():
        # append to existing JSON (list of logs)
        existing = json.loads(blur_log_path.read_text())
        if isinstance(existing, list):
            existing.append(blur_log)
        else:
            existing = [existing, blur_log]
        blur_log_path.write_text(json.dumps(existing, indent=2))
    else:
        blur_log_path.write_text(json.dumps([blur_log], indent=2))

    print("Done:", current_video_name)

In [None]:
import cv2
cv2.destroyAllWindows = lambda: None

## 📦 Batch Video Processing & Metadata Export

This script automates **batch processing** of multiple videos of our dataset.  
It iterates over predefined subfolders, runs the full face-detection, landmark, blurring, and encryption pipeline for each video, and exports results (JSON metadata and optionally blurred videos) into organized output directories.

---

### 🧠 Overview of What It Does

| Step | Purpose |
|------|---------|
| 📁 Folder Iteration | Go through all subfolders (categories) and collect video files |
| 🎞️ Per-Video Processing | Run the full `process_video_one()` pipeline on each video |
| 📊 Frame Metadata Export | Save per-frame JSON metadata for each video |
| 📦 Batch Summary | Count processed videos and total metadata outputs |
| 📁 Archiving | Zip all JSON metadata and optionally blurred videos for easy download |

---

### ⚙️ 1. Folder & File Configuration

```python
FOLDERS = [
    "1", "2", "3", "4", "5",
    "Rest", "Bystander_Movement", "Head_Movement",
    "Close", "Far", "Medium"
]

VIDEO_PATTERNS = ["*.mov", "*.mp4"]
SAVE_BLURRED_VIDEOS = False  # toggle MP4 output

In [None]:
from pathlib import Path
import shutil

# Folders to iterate
FOLDERS = [
    "1", "2", "3", "4", "5",
    "Rest", "Bystander_Movement", "Head_Movement",
    "Close", "Far", "Medium"
]

VIDEO_PATTERNS = ["*.mov", "*.mp4"]
SAVE_BLURRED_VIDEOS = False  # toggle MP4 output

def iter_videos(folder: Path, patterns):
    vids = []
    for pat in patterns:
        vids.extend(folder.glob(pat))
    return sorted(vids, key=lambda p: p.name.lower())

grand_total_videos = 0
grand_total_jsons = 0

# One common output folder for all JSONs and (optional) blurred videos
json_dir = OUT_ROOT / "frame_json"
blur_dir = OUT_ROOT / "blurred"
json_dir.mkdir(parents=True, exist_ok=True)
if SAVE_BLURRED_VIDEOS:
    blur_dir.mkdir(parents=True, exist_ok=True)

print(f"Expecting {len(FOLDERS)} subfolders under {DATASET_ROOT}")

for folder_name in FOLDERS:
    src_dir = DATASET_ROOT / str(folder_name)
    if not src_dir.exists() or not src_dir.is_dir():
        print(f"\n=== Folder: {folder_name} | ⚠ not found at {src_dir}")
        continue

    videos = iter_videos(src_dir, VIDEO_PATTERNS)
    num_videos = len(videos)
    print(f"\n=== Folder: {folder_name} | {num_videos} video(s) in {src_dir} ===")

    processed_count = 0
    folder_json_count = 0

    for idx, vpath in enumerate(videos, start=1):
        print(f"\n--- [{folder_name}] {idx}/{num_videos}: {vpath.name} ---")

        # Point config at common outputs
        config.INPUT_VIDEO_PATH = str(vpath)
        config.FRAME_JSON_DIR = str(json_dir)
        config.SAVE_OUTPUT = bool(SAVE_BLURRED_VIDEOS)
        if SAVE_BLURRED_VIDEOS:
            config.OUTPUT_VIDEO_PATH = str(blur_dir / f"{vpath.stem}_blurred.mp4")

        # Run pipeline
        try:
            process_video_one(config)
            processed_count += 1
        except Exception as e:
            print(f"Pipeline error for {vpath.name}: {e}")
            continue

        # Count JSONs
        saved_jsons = sorted(json_dir.glob(f"{vpath.stem}_frame*.json"))
        folder_json_count += len(saved_jsons)

        # Optional frame check
        try:
            expected_frames = load_target_frames_for_video(config.FRAMES_CSV_PATH, vpath.stem)
        except Exception:
            expected_frames = None

        if expected_frames is not None:
            try:
                expected_set = set(int(x) for x in expected_frames)
                got_frames = {int(p.stem.split("_frame", 1)[-1]) for p in saved_jsons}
                missing = sorted(expected_set - got_frames)
                if missing:
                    print(f"Missing {len(missing)} expected frames. Example: {missing[:10]}")
                else:
                    print(f"All {len(got_frames)} expected frames saved.")
            except Exception as e:
                print(f"Frame check error for {vpath.stem}: {e}")
        else:
            print(f"Saved {len(saved_jsons)} JSON files for {vpath.stem}.")

    grand_total_videos += processed_count
    grand_total_jsons += folder_json_count

    print(f"\n>>> Folder complete: {folder_name}")
    print(f"    Videos processed: {processed_count}/{num_videos}")
    print(f"    JSONs saved:      {folder_json_count}")

# --- Final zip(s) ---
# One big zip with all JSONs
zip_jsons = OUT_ROOT / "all_frame_jsons.zip"
if zip_jsons.exists():
    zip_jsons.unlink()
shutil.make_archive(str(zip_jsons.with_suffix("")), 'zip', root_dir=json_dir)
print(f"\nZipped all JSONs → {zip_jsons}")

# (Optional) one zip for blurred videos if enabled
if SAVE_BLURRED_VIDEOS:
    zip_videos = OUT_ROOT / "all_blurred_videos.zip"
    if zip_videos.exists():
        zip_videos.unlink()
    shutil.make_archive(str(zip_videos.with_suffix("")), 'zip', root_dir=blur_dir)
    print(f"Zipped all blurred videos → {zip_videos}")

print("\n======== Batch complete ========")
print(f"Total videos processed: {grand_total_videos}")
print(f"Total JSONs saved:      {grand_total_jsons}")
print("All outputs organized under:", OUT_ROOT.resolve())

## 📊 COCO Evaluation Pipeline for Face Detection

This script converts ground-truth and prediction JSON files into **COCO-compatible format** and runs a full **evaluation** using `pycocotools`.  
It calculates standard detection metrics such as **Average Precision (AP)** and **Average Recall (AR)** at different IoU thresholds to evaluate the performance of your pipeline.

---

### 🧠 What This Script Does

| Stage | Purpose |
|-------|---------|
| 🗂️ Load Ground Truth | Read ground truth JSONs containing annotated bounding boxes |
| 🧪 Build COCO GT | Convert ground truth to COCO format (`coco_gt.json`) |
| 📈 Build COCO Results | Convert model predictions to COCO format (`coco_results.json`) |
| 🔎 Evaluate | Run `pycocotools` evaluation and compute AP/AR metrics |
| 📊 Summarize | Print detailed results and store them in a results dictionary |

---

### ⚙️ 1. Configuration

```python
GT_DIR  = "YourAnnotationsFolder"           # Folder containing *_output.json files (ground truth)
EXP_DIR = OUT_ROOT / "frame_json"           # Folder containing pipeline output JSONs (predictions)
OUTDIR  = "../coco_eval"                    # Directory to save COCO-format JSONs and results
CATEGORY_NAME = "face"                      # Object category name
CATEGORY_ID = 1                             # Category ID

In [None]:
#   (also accepts "bbox_xyxy" instead of "pred_bbox"; hulls ignored)
all_results = {}

import os, json, math, numpy as np
from typing import Any, Dict, List, Tuple, Optional

# ----------------- CONFIG: CHANGE THESE -----------------
GT_DIR  = "YourAnnotationsFolder"    # folder containing *_output.json files
EXP_DIR = OUT_ROOT / "frame_json"   # folder containing *.json files
OUTDIR  = "../coco_eval"       # where coco_gt.json / coco_results.json will be written
CATEGORY_NAME = "face"
CATEGORY_ID = 1
# --------------------------------------------------------

os.makedirs(OUTDIR, exist_ok=True)
COCO_GT_PATH = os.path.join(OUTDIR, "coco_gt.json")
COCO_DT_PATH = os.path.join(OUTDIR, "coco_results.json")

# --------------- Helpers ---------------
def list_jsons(folder: str) -> List[str]:
    return sorted([f for f in os.listdir(folder) if f.lower().endswith(".json")])

def read_json(path: str):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def write_json(obj, path: str):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(obj, f)

def xyxy_to_xywh(b: List[float]) -> List[float]:
    x1, y1, x2, y2 = [float(v) for v in b]
    return [x1, y1, max(0.0, x2 - x1), max(0.0, y2 - y1)]

def extract_all_gt_xyxy(js: Any) -> List[List[float]]:
    """
    GT: list of dicts or single dict with 'bbox_xyxy'.
    Returns all xyxy boxes. Empty list if none.
    """
    out: List[List[float]] = []
    if isinstance(js, dict):
        bb = js.get("bbox_xyxy")
        if isinstance(bb, list) and len(bb) == 4:
            out.append([float(x) for x in bb])
    elif isinstance(js, list):
        for item in js:
            if isinstance(item, dict):
                bb = item.get("bbox_xyxy")
                if isinstance(bb, list) and len(bb) == 4:
                    out.append([float(x) for x in bb])
    return out

def parse_pred_rect_xyxy(det: Dict[str, Any]) -> Optional[List[float]]:
    """
    EXP: prefer 'pred_bbox', fallback to 'bbox_xyxy', both xyxy.
    """
    if not isinstance(det, dict):
        return None
    pb = det.get("pred_bbox")
    if isinstance(pb, list) and len(pb) == 4:
        return [float(v) for v in pb]
    bxyxy = det.get("bbox_xyxy")
    if isinstance(bxyxy, list) and len(bxyxy) == 4:
        return [float(v) for v in bxyxy]
    return None

def extract_all_pred_xyxy_and_scores(js: Any) -> List[Tuple[List[float], float]]:
    """
    EXP: list (or dict) of detections.
    Score priority: det_score > score > land_score > 1.0
    NaN/inf scores -> 0.0
    """
    out: List[Tuple[List[float], float]] = []
    dets = js if isinstance(js, list) else ([js] if isinstance(js, dict) else [])
    for d in dets:
        if not isinstance(d, dict):
            continue
        box = parse_pred_rect_xyxy(d)
        if box is None:
            continue
        score = None
        for k in ("det_score", "score", "land_score"):
            v = d.get(k, None)
            if isinstance(v, (int, float)):
                score = float(v)
                break
        if score is None:
            score = 1.0
        # sanitize to finite
        try:
            if not math.isfinite(score):
                score = 0.0
        except Exception:
            score = 0.0
        out.append((box, score))
    return out

# --------------- Build COCO GT from GT_DIR ---------------
def build_coco_gt_from_gt_folder(gt_dir: str, category_id: int = 1, category_name: str = "face") -> Dict:
    """
    Creates COCO GT with:
      images.file_name == exp filename (i.e., GT filename with trailing '_output' removed before '.json')
      annotations with bbox in xywh
      includes required 'info' and 'licenses' to satisfy pycocotools expectations
    """
    images = []
    annotations = []
    categories = [{"id": category_id, "name": category_name}]
    ann_id = 1
    img_id = 1

    gt_files = list_jsons(gt_dir)
    for fname in gt_files:
        # Map GT "<name>_output.json" -> EXP "<name>.json"
        if fname.endswith("_output.json"):
            exp_name = fname[:-len("_output.json")] + ".json"
        else:
            exp_name = fname  # fallback: identical name

        img_entry = {"id": img_id, "file_name": exp_name}
        images.append(img_entry)

        js = read_json(os.path.join(gt_dir, fname))
        gt_boxes = extract_all_gt_xyxy(js)
        for b in gt_boxes:
            x, y, w, h = xyxy_to_xywh(b)
            annotations.append({
                "id": ann_id,
                "image_id": img_id,
                "category_id": category_id,
                "bbox": [x, y, w, h],
                "iscrowd": 0,
                "area": float(w * h),
            })
            ann_id += 1

        img_id += 1

    coco_gt = {
        "info": {
            "description": "Auto-generated GT",
            "version": "1.0",
            "year": 2025,
            "contributor": "",
            "date_created": ""
        },
        "licenses": [],
        "images": images,
        "annotations": annotations,
        "categories": categories
    }
    return coco_gt

# --------------- Build COCO Results from EXP_DIR using GT keying ---------------
def build_coco_results_from_exp_folder(coco_gt: Dict, exp_dir: str, category_id: int = 1) -> List[Dict]:
    """
    Uses coco_gt['images'] file_name to map to EXP files.
    Returns list of detection dicts (xywh + score).
    """
    # Map exp file_name -> image_id
    name2id = {img["file_name"]: img["id"] for img in coco_gt.get("images", [])}
    results: List[Dict] = []

    exp_files = set(list_jsons(exp_dir))
    for exp_name, image_id in name2id.items():
        if exp_name not in exp_files:
            # missing prediction file: skip quietly
            continue
        js = read_json(os.path.join(exp_dir, exp_name))
        preds = extract_all_pred_xyxy_and_scores(js)
        for xyxy, score in preds:
            x, y, w, h = xyxy_to_xywh(xyxy)
            results.append({
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [x, y, w, h],
                "score": float(score),
            })
    return results

# --------------- Save & Evaluate (pycocotools) ---------------
def run_coco_eval(gt_path: str, dt_path: str):
    from pycocotools.coco import COCO
    from pycocotools.cocoeval import COCOeval

    cocoGt = COCO(gt_path)
    cocoDt = cocoGt.loadRes(dt_path) if os.path.getsize(dt_path) > 2 else cocoGt.loadRes([])
    img_ids = sorted(cocoGt.getImgIds())

    # AP/AR on standard COCO range 0.50:0.95 (for reference)
    ev_std = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_std.params.imgIds = img_ids
    ev_std.evaluate(); ev_std.accumulate(); ev_std.summarize()
    ap_std = ev_std.stats[0]  # AP@[.50:.95]

    # AP@0.50
    ev_50 = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_50.params.imgIds = img_ids
    ev_50.params.iouThrs = np.array([0.50])
    ev_50.evaluate(); ev_50.accumulate(); ev_50.summarize()
    ap50 = ev_50.stats[0]  # AP at single IoU

    # AP@0.75
    ev_75 = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_75.params.imgIds = img_ids
    ev_75.params.iouThrs = np.array([0.75])
    ev_75.evaluate(); ev_75.accumulate(); ev_75.summarize()
    ap75 = ev_75.stats[0]

    # AP@[0.50:0.90] step 0.05
    ev_5090 = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_5090.params.imgIds = img_ids
    ev_5090.params.iouThrs = np.arange(0.50, 0.95, 0.05)  # 0.50,0.55,...,0.90
    ev_5090.evaluate(); ev_5090.accumulate(); ev_5090.summarize()
    ap_5090 = ev_5090.stats[0]

    # AR@0.50
    ev_ar50 = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_ar50.params.imgIds = img_ids
    ev_ar50.params.iouThrs = np.array([0.50])
    ev_ar50.evaluate(); ev_ar50.accumulate(); ev_ar50.summarize()
    ar50 = ev_ar50.stats[8]  # AR (area=all, maxDets=100)

    # AR@0.75
    ev_ar75 = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_ar75.params.imgIds = img_ids
    ev_ar75.params.iouThrs = np.array([0.75])
    ev_ar75.evaluate(); ev_ar75.accumulate(); ev_ar75.summarize()
    ar75 = ev_ar75.stats[8]

    # AR@[0.50:0.90]
    ev_ar5090 = COCOeval(cocoGt, cocoDt, iouType='bbox')
    ev_ar5090.params.imgIds = img_ids
    ev_ar5090.params.iouThrs = np.arange(0.50, 0.95, 0.05)
    ev_ar5090.evaluate(); ev_ar5090.accumulate(); ev_ar5090.summarize()
    ar_5090 = ev_ar5090.stats[8]

    print("\n======= Custom Summary =======")
    print(f"AP@[0.50:0.95] : {ap_std:.4f}")
    print(f"AP@0.50        : {ap50:.4f}")
    print(f"AP@0.75        : {ap75:.4f}")
    print(f"AP@[0.50:0.90] : {ap_5090:.4f}")
    print(f"AR@0.50        : {ar50:.4f}")
    print(f"AR@0.75        : {ar75:.4f}")
    print(f"AR@[0.50:0.90] : {ar_5090:.4f}")
    print("================================")

    metrics = {
        "AP@[0.50:0.95]": ap_std,
        "AP@0.50": ap50,
        "AP@0.75": ap75,
        "AP@[0.50:0.90]": ap_5090,
        "AR@0.50": ar50,
        "AR@0.75": ar75,
        "AR@[0.50:0.90]": ar_5090,
    }
    return metrics

# --------------- Pipeline ---------------
# 1) Build COCO GT from GT_DIR (mapping names by dropping trailing '_output')
coco_gt = build_coco_gt_from_gt_folder(GT_DIR, category_id=CATEGORY_ID, category_name=CATEGORY_NAME)
write_json(coco_gt, COCO_GT_PATH)
print(f"Wrote COCO GT to: {COCO_GT_PATH}  | images={len(coco_gt['images'])}, anns={len(coco_gt['annotations'])}")

# 2) Build COCO results from EXP_DIR keyed by coco_gt['images'].file_name
coco_dt = build_coco_results_from_exp_folder(coco_gt, EXP_DIR, category_id=CATEGORY_ID)
write_json(coco_dt, COCO_DT_PATH)
try:
    print(f"Wrote COCO Results to: {COCO_DT_PATH}  | detections={len(coco_dt)} | images={len(coco_dt['images'])}")
except:
    pass
# 3) Install pycocotools (if not already) and evaluate
try:
    from pycocotools.coco import COCO  # noqa: F401
except Exception:
    !pip -q install pycocotools

metrics = run_coco_eval(COCO_GT_PATH, COCO_DT_PATH)

all_results["skip 0"] = [
    metrics["AP@0.50"],
    metrics["AP@0.75"],
    metrics["AP@[0.50:0.95]"],
    metrics["AP@[0.50:0.90]"],
    metrics["AR@0.50"],
    metrics["AR@0.75"],
    metrics["AR@[0.50:0.90]"],
]

print("\nUpdated results dict:")
print(all_results)