<a href="https://colab.research.google.com/github/ayeshamaqsood6100-lab/My-Projects-/blob/main/Version_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title üöÄ Meta Sapiens: GOLIATH 1B - v9 (Repair & Diagnostic)
# ============================================================================
# META SAPIENS 1B - GOLIATH EDITION v9
# ============================================================================
# DIAGNOSTIC MODE:
# 1. Forces FFmpeg "Repair" on input video to ensure readability.
# 2. Prints frame progress explicitly.
# 3. Uses Sequential processing to save RAM.
# ============================================================================

import os
import sys
import subprocess
import json
import torch
import cv2
import numpy as np
import gc
from tqdm import tqdm
from huggingface_hub import hf_hub_download

# ============================================================================
# PHASE 0: INSTALL DEPENDENCIES
# ============================================================================
print("üîß PHASE 0: INSTALLING DEPENDENCIES")
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

install("torch")
install("torchvision")
install("opencv-python")
install("tqdm")
install("huggingface_hub")
print("‚úÖ Dependencies installed!\n")

# ============================================================================
# PHASE 1: SETUP & DOWNLOAD MODELS
# ============================================================================
print("üöÄ PHASE 1: SETUP")
try:
    from google.colab import drive
    if not os.path.exists('/content/drive'):
        drive.mount('/content/drive')
    IN_COLAB = True
    print("‚úÖ Google Drive mounted!")
except ImportError:
    IN_COLAB = False
    print("‚ö†Ô∏è Not running in Colab.")

MODEL_DIR = "/content/sapiens_models" if IN_COLAB else "./sapiens_models"

print("\nüì¶ Downloading Models (Goliath 1B)...")
POSE_MODEL_PATH = hf_hub_download(repo_id="facebook/sapiens-pose-1b-torchscript", filename="sapiens_1b_goliath_best_goliath_AP_639_torchscript.pt2", local_dir=MODEL_DIR)
SEG_MODEL_PATH = hf_hub_download(repo_id="facebook/sapiens-seg-1b-torchscript", filename="sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2", local_dir=MODEL_DIR)
print("‚úÖ Models Ready!\n")

# ============================================================================
# CONFIGURATION & UTILS
# ============================================================================
VIDEO_FILENAME = "Bandbaja.mp4"  # <--- MAKE SURE THIS MATCHES YOUR FILE
BASE_PATH = "/content/drive/MyDrive" if IN_COLAB else "."
RAW_INPUT = os.path.join(BASE_PATH, VIDEO_FILENAME)

# We will create a "Cleaned" version of the input first
CLEAN_INPUT = "/content/cleaned_input.mp4" if IN_COLAB else "./cleaned_input.mp4"

# Outputs
OUTPUT_POSE_AVI = os.path.join(BASE_PATH, "temp_pose.avi")
OUTPUT_SEG_AVI = os.path.join(BASE_PATH, "temp_seg.avi")
OUTPUT_COMBINED_AVI = os.path.join(BASE_PATH, "temp_combined.avi")
OUTPUT_FINAL = os.path.join(BASE_PATH, f"GOLIATH_v9_{VIDEO_FILENAME}")

# --- REPAIR VIDEO FUNCTION ---
print(f"üîç Checking Input: {RAW_INPUT}")
if not os.path.exists(RAW_INPUT):
    print(f"‚ùå ERROR: File not found at {RAW_INPUT}")
    print("   Please check the filename in your Google Drive.")
    sys.exit(1)

print("üîß Repairing video with FFmpeg to ensure readability...")
# Converts to standard H.264 MP4. This fixes 99% of "OpenCV read fail" errors.
subprocess.run(f'ffmpeg -y -loglevel error -i "{RAW_INPUT}" -c:v libx264 -preset fast -crf 23 -pix_fmt yuv420p "{CLEAN_INPUT}"', shell=True)

if not os.path.exists(CLEAN_INPUT):
    print("‚ùå Critical Error: FFmpeg failed to process video.")
    sys.exit(1)
else:
    print("‚úÖ Video Repaired and Ready.")

# ============================================================================
# LOGIC & DRAWING
# ============================================================================
GOLIATH_KEYPOINTS = [
    'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
    'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
    'left_hip', 'right_hip', 'left_knee', 'right_knee',
    'left_ankle', 'right_ankle', 'left_big_toe', 'left_small_toe',
    'left_heel', 'right_big_toe', 'right_small_toe', 'right_heel',
    'right_thumb4', 'right_thumb3', 'right_thumb2', 'right_thumb_third_joint',
    'right_forefinger4', 'right_forefinger3', 'right_forefinger2', 'right_forefinger_third_joint',
    'right_middle_finger4', 'right_middle_finger3', 'right_middle_finger2', 'right_middle_finger_third_joint',
    'right_ring_finger4', 'right_ring_finger3', 'right_ring_finger2', 'right_ring_finger_third_joint',
    'right_pinky_finger4', 'right_pinky_finger3', 'right_pinky_finger2', 'right_pinky_finger_third_joint',
    'right_wrist',
    'left_thumb4', 'left_thumb3', 'left_thumb2', 'left_thumb_third_joint',
    'left_forefinger4', 'left_forefinger3', 'left_forefinger2', 'left_forefinger_third_joint',
    'left_middle_finger4', 'left_middle_finger3', 'left_middle_finger2', 'left_middle_finger_third_joint',
    'left_ring_finger4', 'left_ring_finger3', 'left_ring_finger2', 'left_ring_finger_third_joint',
    'left_pinky_finger4', 'left_pinky_finger3', 'left_pinky_finger2', 'left_pinky_finger_third_joint',
    'left_wrist', 'left_olecranon', 'right_olecranon',
    'left_cubital_fossa', 'right_cubital_fossa', 'left_acromion', 'right_acromion', 'neck',
]
for i in range(len(GOLIATH_KEYPOINTS), 308): GOLIATH_KEYPOINTS.append(f'face_kp_{i}')

BODY_PART_COLORS = np.array([
    [0,0,0], [255,220,200], [255,200,170], [100,200,100], [50,255,50], [255,165,0],
    [0,255,128], [255,140,0], [0,255,255], [255,100,100], [100,100,255], [255,100,255],
    [50,50,200], [200,50,200], [0,200,200], [200,200,0], [139,90,43], [255,210,180],
    [255,255,255], [255,100,150], [255,150,150], [255,120,120], [160,110,60], [160,110,60],
    [100,160,220], [100,160,220], [255,200,160], [200,160,110]], dtype=np.uint8)

C_ORANGE = (255, 165, 0); C_BLUE = (51, 153, 255); C_GREEN = (0, 255, 0); C_FACE_KP = (255, 255, 255)
SKELETON = [
    ('left_shoulder', 'right_shoulder', C_ORANGE), ('left_hip', 'right_hip', C_ORANGE),
    ('left_shoulder', 'left_hip', C_ORANGE), ('right_shoulder', 'right_hip', C_ORANGE),
    ('neck', 'left_shoulder', C_ORANGE), ('neck', 'right_shoulder', C_ORANGE),
    ('left_shoulder', 'left_elbow', C_GREEN), ('left_elbow', 'left_wrist', C_GREEN),
    ('left_hip', 'left_knee', C_GREEN), ('left_knee', 'left_ankle', C_GREEN),
    ('left_ankle', 'left_big_toe', C_GREEN), ('left_ankle', 'left_heel', C_GREEN),
    ('right_shoulder', 'right_elbow', C_BLUE), ('right_elbow', 'right_wrist', C_BLUE),
    ('right_hip', 'right_knee', C_BLUE), ('right_knee', 'right_ankle', C_BLUE),
    ('right_ankle', 'right_big_toe', C_BLUE), ('right_ankle', 'right_heel', C_BLUE),
    ('left_wrist', 'left_thumb_third_joint', C_GREEN), ('right_wrist', 'right_thumb_third_joint', C_BLUE)
]
for side, color in [('left', C_GREEN), ('right', C_BLUE)]:
    for finger in ['thumb', 'forefinger', 'middle_finger', 'ring_finger', 'pinky_finger']:
        base = f'{side}_{finger}_third_joint' if finger != 'thumb' else f'{side}_thumb_third_joint'
        SKELETON.append((f'{side}_wrist', base, color))

def preprocess(img):
    img = cv2.resize(img, (768, 1024))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
    img = (img - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
    return torch.from_numpy(img.transpose(2, 0, 1)).unsqueeze(0).float()

def get_keypoints(heatmaps, w, h):
    kps = {}
    for i, name in enumerate(GOLIATH_KEYPOINTS):
        if i >= heatmaps.shape[0]: break
        hm = heatmaps[i]; idx = np.argmax(hm)
        y, x = np.unravel_index(idx, hm.shape)
        kps[name] = {'x': x * w / hm.shape[1], 'y': y * h / hm.shape[0], 'conf': float(hm[y, x])}
    return kps

def draw_pose_hq(frame, kps, conf_thresh=0.3):
    h, w = frame.shape[:2]
    line_thick = max(1, int(min(w, h) / 600))
    face_rad = 1
    # Lines
    for start, end, color in SKELETON:
        if start in kps and end in kps:
            k1, k2 = kps[start], kps[end]
            if k1['conf'] > conf_thresh and k2['conf'] > conf_thresh:
                cv2.line(frame, (int(k1['x']), int(k1['y'])), (int(k2['x']), int(k2['y'])), color, line_thick, cv2.LINE_AA)
    # Face Dots (Sparse)
    face_anchors = ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear']
    sorted_keys = sorted(kps.keys())
    face_counter = 0
    for name in sorted_keys:
        kp = kps[name]
        if kp['conf'] > conf_thresh:
            is_face = name.startswith('face_kp') or name in face_anchors
            if is_face:
                 face_counter += 1
                 if face_counter % 2 == 0:
                    cv2.circle(frame, (int(kp['x']), int(kp['y'])), face_rad, C_FACE_KP, -1, cv2.LINE_AA)
    return frame

def draw_seg_hq(seg_map, w, h, original_frame, blend_alpha=0.7):
    seg_colored = BODY_PART_COLORS[seg_map]
    seg_colored = cv2.resize(seg_colored, (w, h), interpolation=cv2.INTER_LINEAR)
    mask = (seg_map != 0).astype(np.uint8)
    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_LINEAR)
    mask = np.expand_dims(mask, axis=2)
    blended = (seg_colored * blend_alpha + original_frame * (1 - blend_alpha)).astype(np.uint8)
    return np.where(mask > 0, blended, original_frame).astype(np.uint8)

# ============================================================================
# PROCESSING LOOP (Sequential)
# ============================================================================
print("\nüé¨ INITIALIZING PROCESSING...")

# 1. READ VIDEO METADATA
cap = cv2.VideoCapture(CLEAN_INPUT)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS) or 30
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Ensure even dims
width = (width // 2) * 2; height = (height // 2) * 2
cap.release()

print(f"   Video Info: {width}x{height}, {total_frames} frames, {fps} fps")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"   Device: {device}")

# --- PASS 1: POSE ---
print("\nüèÉ PASS 1/3: POSE ESTIMATION (Loading Model...)")
gc.collect(); torch.cuda.empty_cache()
pose_model = torch.jit.load(POSE_MODEL_PATH, map_location=device).eval()

cap = cv2.VideoCapture(CLEAN_INPUT)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out_pose = cv2.VideoWriter(OUTPUT_POSE_AVI, fourcc, fps, (width, height))

processed_count = 0
with tqdm(total=total_frames, desc="Pose") as pbar:
    while True:
        ret, frame = cap.read()
        if not ret: break

        frame = cv2.resize(frame, (width, height))
        tensor = preprocess(frame).to(device)

        with torch.no_grad():
            pose_out = pose_model(tensor)
        heatmaps = (pose_out[0] if isinstance(pose_out, tuple) else pose_out)[0].cpu().numpy()
        kps = get_keypoints(heatmaps, width, height)

        result_frame = draw_pose_hq(frame.copy(), kps)
        out_pose.write(result_frame)

        processed_count += 1
        pbar.update(1)

cap.release(); out_pose.release()
del pose_model; gc.collect(); torch.cuda.empty_cache()
print(f"‚úÖ Pose Pass Done. Processed {processed_count} frames.")
if processed_count == 0:
    print("‚ùå ERROR: No frames processed! Video read failed.")
    sys.exit(1)

# --- PASS 2: SEGMENTATION ---
print("\nüèÉ PASS 2/3: SEGMENTATION (Loading Model...)")
seg_model = torch.jit.load(SEG_MODEL_PATH, map_location=device).eval()

cap = cv2.VideoCapture(CLEAN_INPUT)
out_seg = cv2.VideoWriter(OUTPUT_SEG_AVI, fourcc, fps, (width, height))

with tqdm(total=total_frames, desc="Seg") as pbar:
    while True:
        ret, frame = cap.read()
        if not ret: break
        frame = cv2.resize(frame, (width, height))

        tensor = preprocess(frame).to(device)
        with torch.no_grad():
            seg_out = seg_model(tensor)
        seg_logits = (seg_out[0] if isinstance(seg_out, tuple) else seg_out)[0].cpu().numpy()
        seg_map = np.argmax(seg_logits, axis=0)

        result_frame = draw_seg_hq(seg_map, width, height, frame.copy())
        out_seg.write(result_frame)
        pbar.update(1)

cap.release(); out_seg.release()
del seg_model; gc.collect(); torch.cuda.empty_cache()

# --- PASS 3: STITCHING ---
print("\nüßµ PASS 3/3: STITCHING...")
cap1 = cv2.VideoCapture(OUTPUT_POSE_AVI)
cap2 = cv2.VideoCapture(OUTPUT_SEG_AVI)
out_final = cv2.VideoWriter(OUTPUT_COMBINED_AVI, fourcc, fps, (width * 2, height))

while True:
    r1, f1 = cap1.read()
    r2, f2 = cap2.read()
    if not r1 or not r2: break
    out_final.write(np.hstack([f1, f2]))

cap1.release(); cap2.release(); out_final.release()

# --- CONVERT ---
print("\nüîÑ FINISHING UP (Converting to MP4)...")
def convert(inp, out):
    if os.path.exists(inp):
        subprocess.run(f'ffmpeg -y -loglevel error -i "{inp}" -c:v libx264 -pix_fmt yuv420p "{out}"', shell=True)
        os.remove(inp)

convert(OUTPUT_COMBINED_AVI, OUTPUT_FINAL)

print("="*60)
print(f"‚úÖ SUCCESS! Video Saved: {OUTPUT_FINAL}")
print("="*60)