In [None]:
import cv2
import torch
import numpy as np
import os
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from PIL import Image

# ================= KAGGLE REFEREE CONFIG =================
# BCE (Original/Baseline)
BCE_PATH = "/kaggle/input/datasets/gonoszgonosz/b2-1024-weights/final_rat_model_b3_1024"
# DICE (New Experiment) - Update this path once your run finishes
DICE_PATH = "/kaggle/working/final_rat_model_b3_1024_DICE"

INPUT_VIDEO = "/kaggle/input/rat-test-video/test.mp4"
OUTPUT_VIDEO = "/kaggle/working/BCE_vs_DICE_Referee.mp4"

CONFIDENCE = 0.5 
# =========================================================

def get_prediction(model, processor, frame, device):
    """Generates a raw binary mask for a frame."""
    old_h, old_w = frame.shape[:2]
    desired_size = max(old_h, old_w)
    
    # Pad to Square
    square_frame = np.zeros((desired_size, desired_size, 3), dtype=np.uint8)
    y_off, x_off = (desired_size - old_h) // 2, (desired_size - old_w) // 2
    square_frame[y_off:y_off + old_h, x_off:x_off + old_w] = frame
    
    inputs = processor(images=Image.fromarray(cv2.cvtColor(square_frame, cv2.COLOR_BGR2RGB)), return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = torch.nn.functional.interpolate(outputs.logits, size=(desired_size, desired_size), mode="bilinear", align_corners=False)
        probs = torch.nn.functional.softmax(logits, dim=1)
        mask_square = (probs[0, 1, :, :] > CONFIDENCE).cpu().numpy().astype(np.uint8)
    
    return mask_square[y_off:y_off + old_h, x_off:x_off + old_w]

def main():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"--- REFEREE MODE: BCE vs DICE ---")

    # Load both models
    model_bce = SegformerForSemanticSegmentation.from_pretrained(BCE_PATH).to(device)
    proc_bce = SegformerImageProcessor.from_pretrained(BCE_PATH)
    
    model_dice = SegformerForSemanticSegmentation.from_pretrained(DICE_PATH).to(device)
    proc_dice = SegformerImageProcessor.from_pretrained(DICE_PATH)

    cap = cv2.VideoCapture(INPUT_VIDEO)
    w, h, fps = int(cap.get(3)), int(cap.get(4)), cap.get(5)
    
    # Double width for side-by-side
    out = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w * 2, h))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: break
        
        # Get raw masks (No Highlander Rule!)
        mask_bce = get_prediction(model_bce, proc_bce, frame, device)
        mask_dice = get_prediction(model_dice, proc_dice, frame, device)

        # Apply Overlays
        res_bce = frame.copy()
        res_dice = frame.copy()
        
        # BCE = Red Overlay (to distinguish)
        res_bce[mask_bce == 1] = cv2.addWeighted(res_bce[mask_bce == 1], 0.5, np.array([0, 0, 255], dtype=np.uint8), 0.5, 0)
        # DICE = Green Overlay
        res_dice[mask_dice == 1] = cv2.addWeighted(res_dice[mask_dice == 1], 0.5, np.array([0, 255, 0], dtype=np.uint8), 0.5, 0)

        # Add Labels
        cv2.putText(res_bce, "BASELINE: BCE (Weighted)", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(res_dice, "EXPERIMENT: DICE LOSS", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        # Combine side-by-side
        combined = np.hstack((res_bce, res_dice))
        out.write(combined)

    cap.release(); out.release()
    print(f"Referee Video Saved: {OUTPUT_VIDEO}")

if __name__ == "__main__":
    main()