In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# It's good practice to first uninstall potentially conflicting packages
!pip uninstall torch torchvision torchaudio transformers accelerate bitsandbytes torchao -y

# Install PyTorch (ensure compatibility with CUDA version on Kaggle GPU)
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121

# Install OpenMIM
!pip install -U openmim

# Install MMEngine
!mim install mmengine

# Install a compatible version of MMCV as per the previous error message
!pip uninstall mmcv -y # Uninstall current mmcv first to be sure
!mim install "mmcv>=2.0.0rc4,<2.2.0" 

# Install specific, potentially older but more stable, versions of transformers and accelerate
# These versions are chosen to reduce the likelihood of issues with very new torchao features.
!pip install transformers==4.30.2 
!pip install accelerate==0.22.0 

# Now install mmdet and mmpose. These should pick up the already installed compatible libraries.
!mim install "mmdet>=3.0.0" 
!mim install "mmpose>=1.0.0"

# Install other necessary libraries
!pip install opencv-python numpy tqdm pandas openpyxl requests

In [1]:
import torch
import torchvision
import mmpose
import mmdet
import mmcv
import mmengine # Ensure this is imported
import cv2
import numpy as np
import os
import pandas as pd
import requests
import subprocess
import re
from pathlib import Path
from tqdm import tqdm
# import tempfile # Not strictly needed now

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Current CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

print(f"MMPose version: {mmpose.__version__}")
print(f"MMDetection version: {mmdet.__version__}")
print(f"MMCV version: {mmcv.__version__}")
print(f"MMEngine version: {mmengine.__version__}") # <<< ADD THIS LINE
print(f"Pandas version: {pd.__version__}")
print(f"Requests version: {requests.__version__}")
print(f"OpenCV version: {cv2.__version__}")

from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(f"MMCV CUDA version: {get_compiling_cuda_version()}")
print(f"MMCV compiler version: {get_compiler_version()}")

ffmpeg_check = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True)
if ffmpeg_check.returncode == 0:
    print("ffmpeg found.")
else:
    print("ffmpeg not found. Segmentation will fail.")

PyTorch version: 2.1.0+cu121
Torchvision version: 0.16.0+cu121
CUDA available: True
CUDA version: 12.1
Current CUDA device: Tesla P100-PCIE-16GB
MMPose version: 1.3.2
MMDetection version: 3.3.0
MMCV version: 2.1.0
MMEngine version: 0.10.7
Pandas version: 2.2.3
Requests version: 2.28.2
OpenCV version: 4.11.0


2025-07-01 13:11:22.052880: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751375482.248906     207 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751375482.310114     207 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


MMCV CUDA version: 12.1
MMCV compiler version: GCC 9.3
ffmpeg found.


In [2]:
import os
import requests
import torch # For torch.hub.download_url_to_file
import subprocess # For git clone

# --- Directories ---
BASE_WORKING_DIR = '/kaggle/working/'
CHECKPOINTS_DIR = os.path.join(BASE_WORKING_DIR, 'checkpoints')
MMDET_DIR = os.path.join(BASE_WORKING_DIR, 'mmdetection')
MMPOSE_DIR = os.path.join(BASE_WORKING_DIR, 'mmpose')

os.makedirs(CHECKPOINTS_DIR, exist_ok=True)

# --- Clone MMDetection and MMPose Repositories for Config Files ---
# We'll clone specific versions/tags if known, otherwise main branch.
# For mmdet>=3.0.0, let's try to get a recent stable tag or main.
# For mmpose>=1.0.0, similar approach.
MMDET_REPO_URL = "https://github.com/open-mmlab/mmdetection.git"
MMPOSE_REPO_URL = "https://github.com/open-mmlab/mmpose.git"
MMDET_TAG = "v3.1.0" # A recent stable tag for mmdet 3.x
MMPOSE_TAG = "v1.1.0" # A recent stable tag for mmpose 1.x


def clone_repo_if_not_exists(repo_url, target_dir, tag=None):
    if not os.path.exists(os.path.join(target_dir, '.git')): # Check if it's a git repo
        print(f"Cloning {repo_url} (tag: {tag if tag else 'latest'}) to {target_dir}...")
        clone_command = ['git', 'clone']
        if tag:
            clone_command.extend(['-b', tag])
        clone_command.extend([repo_url, target_dir])
        
        try:
            subprocess.run(clone_command, check=True, capture_output=True, text=True)
            print(f"Successfully cloned {repo_url} to {target_dir}")
        except subprocess.CalledProcessError as e:
            print(f"Error cloning {repo_url}:")
            print(f"Command: {' '.join(e.cmd)}")
            print(f"Return code: {e.returncode}")
            print(f"Stdout: {e.stdout}")
            print(f"Stderr: {e.stderr}")
            raise # Re-raise the exception to stop execution if cloning fails
    else:
        print(f"Repository already exists at {target_dir}, skipping clone.")

clone_repo_if_not_exists(MMDET_REPO_URL, MMDET_DIR, MMDET_TAG)
clone_repo_if_not_exists(MMPOSE_REPO_URL, MMPOSE_DIR, MMPOSE_TAG)


# --- Pose Estimation Model (RTMPose-L Wholebody) ---
# Config path now points within the cloned mmpose repo
local_pose_config_file = os.path.join(MMPOSE_DIR, 'configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py')
pose_checkpoint_file_url = 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth'
local_pose_checkpoint_file = os.path.join(CHECKPOINTS_DIR, 'rtmpose-l_coco-wholebody.pth')

# --- Object Detection Model (Faster R-CNN R50 FPN) ---
# Config path now points within the cloned mmdetection repo
local_det_config_file = os.path.join(MMDET_DIR, 'configs/faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py')
det_checkpoint_file_url = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
local_det_checkpoint_file = os.path.join(CHECKPOINTS_DIR, 'faster_rcnn_r50_fpn_1x_coco.pth')


def download_checkpoint_if_not_exists(url, local_path):
    if not os.path.exists(local_path):
        print(f"Downloading checkpoint {url} to {local_path}...")
        try:
            # Using torch.hub.download_url_to_file for checkpoints
            torch.hub.download_url_to_file(url, local_path, progress=True)
            print("Download complete.")
        except Exception as e:
            print(f"Error downloading checkpoint {url}: {e}")
            if os.path.exists(local_path): # Clean up partial download
                os.remove(local_path)
            raise
    else:
        print(f"Checkpoint {local_path} already exists.")

download_checkpoint_if_not_exists(pose_checkpoint_file_url, local_pose_checkpoint_file)
download_checkpoint_if_not_exists(det_checkpoint_file_url, local_det_checkpoint_file)

# Verify that config files exist at their new paths
if not os.path.exists(local_pose_config_file):
    print(f"ERROR: Pose config file not found: {local_pose_config_file}")
    print("Please check the mmpose repository structure and path.")
else:
    print(f"Pose config file found: {local_pose_config_file}")

if not os.path.exists(local_det_config_file):
    print(f"ERROR: Detection config file not found: {local_det_config_file}")
    print("Please check the mmdetection repository structure and path.")
else:
    print(f"Detection config file found: {local_det_config_file}")
    
print("\nModel configuration paths updated to use cloned repos. Checkpoint paths set.")

Cloning https://github.com/open-mmlab/mmdetection.git (tag: v3.1.0) to /kaggle/working/mmdetection...
Successfully cloned https://github.com/open-mmlab/mmdetection.git to /kaggle/working/mmdetection
Cloning https://github.com/open-mmlab/mmpose.git (tag: v1.1.0) to /kaggle/working/mmpose...
Successfully cloned https://github.com/open-mmlab/mmpose.git to /kaggle/working/mmpose
Downloading checkpoint https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth to /kaggle/working/checkpoints/rtmpose-l_coco-wholebody.pth...


100%|██████████| 128M/128M [00:07<00:00, 17.3MB/s] 


Download complete.
Downloading checkpoint https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth to /kaggle/working/checkpoints/faster_rcnn_r50_fpn_1x_coco.pth...


100%|██████████| 160M/160M [00:15<00:00, 10.8MB/s] 

Download complete.
Pose config file found: /kaggle/working/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
Detection config file found: /kaggle/working/mmdetection/configs/faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py

Model configuration paths updated to use cloned repos. Checkpoint paths set.





In [3]:
from mmdet.apis import init_detector, inference_detector
from mmpose.apis import init_model as init_pose_estimator
from mmengine.registry import DefaultScope # Import DefaultScope class for its static methods

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# --- Manually Define COCO-WholeBody Dataset Info (133 keypoints) ---
# (COCO_WHOLEBODY_KEYPOINT_NAMES and OFFICIAL_MMPOSE_COCO_WHOLEBODY_SKELETON_LINKS definitions remain here as before)
COCO_WHOLEBODY_KEYPOINT_NAMES = [
    'kpt_0', 'kpt_1', 'kpt_2', 'kpt_3', 'kpt_4', 'kpt_5', 'kpt_6', 'kpt_7', 'kpt_8', 'kpt_9', 
    'kpt_10', 'kpt_11', 'kpt_12', 'kpt_13', 'kpt_14', 'kpt_15', 'kpt_16', 'kpt_17', 'kpt_18', 'kpt_19',
    'kpt_20', 'kpt_21', 'kpt_22', 'kpt_23', 'kpt_24', 'kpt_25', 'kpt_26', 'kpt_27', 'kpt_28', 'kpt_29',
    'kpt_30', 'kpt_31', 'kpt_32', 'kpt_33', 'kpt_34', 'kpt_35', 'kpt_36', 'kpt_37', 'kpt_38', 'kpt_39',
    'kpt_40', 'kpt_41', 'kpt_42', 'kpt_43', 'kpt_44', 'kpt_45', 'kpt_46', 'kpt_47', 'kpt_48', 'kpt_49',
    'kpt_50', 'kpt_51', 'kpt_52', 'kpt_53', 'kpt_54', 'kpt_55', 'kpt_56', 'kpt_57', 'kpt_58', 'kpt_59',
    'kpt_60', 'kpt_61', 'kpt_62', 'kpt_63', 'kpt_64', 'kpt_65', 'kpt_66', 'kpt_67', 'kpt_68', 'kpt_69',
    'kpt_70', 'kpt_71', 'kpt_72', 'kpt_73', 'kpt_74', 'kpt_75', 'kpt_76', 'kpt_77', 'kpt_78', 'kpt_79',
    'kpt_80', 'kpt_81', 'kpt_82', 'kpt_83', 'kpt_84', 'kpt_85', 'kpt_86', 'kpt_87', 'kpt_88', 'kpt_89',
    'kpt_90', 'kpt_91', 'kpt_92', 'kpt_93', 'kpt_94', 'kpt_95', 'kpt_96', 'kpt_97', 'kpt_98', 'kpt_99',
    'kpt_100', 'kpt_101', 'kpt_102', 'kpt_103', 'kpt_104', 'kpt_105', 'kpt_106', 'kpt_107', 'kpt_108', 'kpt_109',
    'kpt_110', 'kpt_111', 'kpt_112', 'kpt_113', 'kpt_114', 'kpt_115', 'kpt_116', 'kpt_117', 'kpt_118', 'kpt_119',
    'kpt_120', 'kpt_121', 'kpt_122', 'kpt_123', 'kpt_124', 'kpt_125', 'kpt_126', 'kpt_127', 'kpt_128', 'kpt_129',
    'kpt_130', 'kpt_131', 'kpt_132'
]
OFFICIAL_MMPOSE_COCO_WHOLEBODY_SKELETON_LINKS = [
    [15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12],
    [5, 6], [5, 7], [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2],
    [1, 3], [2, 4], [3, 5], [4, 6], [17, 18], [18, 19], [19, 20],
    [20, 21], [21, 22], [23, 24], [24, 25], [25, 26], [26, 27], [27, 28],
    [28, 29], [23, 30], [30, 31], [31, 32], [32, 33], [23, 34], [34, 35],
    [35, 36], [36, 37], [23, 38], [38, 39], [39, 40], [40, 41], [42, 43],
    [43, 44], [44, 45], [45, 46], [42, 47], [47, 48], [48, 49], [49, 50],
    [42, 51], [51, 52], [52, 53], [53, 54], [42, 55], [55, 56], [56, 57],
    [57, 58], [59, 60], [60, 61], [61, 62], [62, 63], [59, 64], [64, 65],
    [65, 66], [66, 67], [59, 68], [68, 69], [69, 70], [70, 71], [59, 72],
    [72, 73], [73, 74], [74, 75], [59, 76], [76, 77], [77, 78], [78, 79],
    [80, 81], [81, 82], [82, 83], [80, 84], [84, 85], [85, 86], [80, 87],
    [87, 88], [88, 89], [80, 90], [91, 92], [92, 93], [93, 94], [94, 95],
    [91, 96], [96, 97], [97, 98], [98, 99], [91, 100], [100, 101],
    [101, 102], [102, 103], [91, 104], [104, 105], [105, 106],
    [106, 107], [91, 108], [108, 109], [109, 110], [110, 111], [112, 113],
    [113, 114], [114, 115], [115, 116], [112, 117], [117, 118],
    [118, 119], [119, 120], [112, 121], [121, 122], [122, 123],
    [123, 124], [112, 125], [125, 126], [126, 127], [127, 128],
    [112, 129], [129, 130], [130, 131], [131, 132]
]

# --- Initialize models ---
# Initialize detector within the 'mmdet' scope
print("Initializing detector...")
with DefaultScope.overwrite_default_scope(scope_name='mmdet'): # CORRECTED: Using the classmethod context manager
    detector = init_detector(local_det_config_file, local_det_checkpoint_file, device=device)
print("Detector initialized.")

# Initialize pose estimator within the 'mmpose' scope
print("Initializing pose estimator...")
with DefaultScope.overwrite_default_scope(scope_name='mmpose'): # CORRECTED: Using the classmethod context manager
    pose_estimator = init_pose_estimator(local_pose_config_file, local_pose_checkpoint_file, device=device)
    
    # --- Manually set the dataset_meta for COCO-WholeBody ---
    print("Explicitly setting COCO-WholeBody dataset_meta...")
    
    new_dataset_meta = {
        'keypoint_names': COCO_WHOLEBODY_KEYPOINT_NAMES,
        'num_keypoints': len(COCO_WHOLEBODY_KEYPOINT_NAMES),
        'skeleton_links': OFFICIAL_MMPOSE_COCO_WHOLEBODY_SKELETON_LINKS,
    }
    
    if hasattr(pose_estimator, 'dataset_meta') and pose_estimator.dataset_meta is not None:
        pose_estimator.dataset_meta.update(new_dataset_meta)
        print("Updated existing pose_estimator.dataset_meta.")
    else:
        pose_estimator.dataset_meta = new_dataset_meta
        print("Set new pose_estimator.dataset_meta.")
            
    print(f"Using {len(pose_estimator.dataset_meta['keypoint_names'])} keypoint names.")
    print(f"Using {len(pose_estimator.dataset_meta['skeleton_links'])} skeleton links.")

print("Pose estimator initialized.")
print("Models loaded successfully.")

  check_for_updates()


Using device: cuda:0
Initializing detector...
Loads checkpoint by local backend from path: /kaggle/working/checkpoints/faster_rcnn_r50_fpn_1x_coco.pth
Detector initialized.
Initializing pose estimator...
Loads checkpoint by local backend from path: /kaggle/working/checkpoints/rtmpose-l_coco-wholebody.pth
Explicitly setting COCO-WholeBody dataset_meta...
Updated existing pose_estimator.dataset_meta.
Using 133 keypoint names.
Using 128 skeleton links.
Pose estimator initialized.
Models loaded successfully.


In [16]:
# Cell 6 (CORRECTED AGAIN): Pose Extraction Functions

from mmpose.apis import inference_topdown
from mmengine.registry import DefaultScope 
import numpy as np 
import torch
from tqdm.auto import tqdm
import cv2

def extract_pose_from_frame(frame_bgr, detector_model, pose_estimator_model, person_label_id=0, detection_threshold=0.5):
    """
    Extracts whole-body pose from a single frame for the largest detected person.
    This version robustly handles both Tensor and NumPy array outputs from the models.
    """
    # --- Object Detection ---
    with DefaultScope.overwrite_default_scope(scope_name='mmdet'):
        det_results = inference_detector(detector_model, frame_bgr)
        pred_instances = det_results.pred_instances
        
        person_indices = (pred_instances.labels == person_label_id) & (pred_instances.scores > detection_threshold)
        person_bboxes = pred_instances.bboxes[person_indices]

        if len(person_bboxes) == 0:
            return None 

        areas = (person_bboxes[:, 2] - person_bboxes[:, 0]) * (person_bboxes[:, 3] - person_bboxes[:, 1])
        
        # Check if areas is a tensor before calling .cpu()
        if isinstance(areas, torch.Tensor):
            largest_person_idx = areas.cpu().numpy().argmax()
            main_person_bbox = person_bboxes[largest_person_idx:largest_person_idx+1].cpu().numpy()
        else: # It's already a numpy array
            largest_person_idx = areas.argmax()
            main_person_bbox = person_bboxes[largest_person_idx:largest_person_idx+1]

    if main_person_bbox.shape[0] == 0:
        return None

    # --- Pose Estimation ---
    with DefaultScope.overwrite_default_scope(scope_name='mmpose'):
        pose_results = inference_topdown(pose_estimator_model, frame_bgr, main_person_bbox)
        
        if not pose_results:
            return None 
            
        data_sample = pose_results[0]
        # These could be Tensors or NumPy arrays
        keypoints_data = data_sample.pred_instances.keypoints[0]
        scores_data = data_sample.pred_instances.keypoint_scores[0]
        
        # <<< THIS IS THE FIX >>>
        # Check the type before trying to convert
        if isinstance(keypoints_data, torch.Tensor):
            keypoints_np = keypoints_data.cpu().numpy()
        else: # It's already a numpy array
            keypoints_np = keypoints_data

        if isinstance(scores_data, torch.Tensor):
            scores_np = scores_data.cpu().numpy()
        else: # It's already a numpy array
            scores_np = scores_data
        
        combined_pose_data = np.concatenate((keypoints_np, scores_np[:, np.newaxis]), axis=1)
    
    return combined_pose_data

def process_video_clip_for_pose(video_clip_path, detector_model, pose_estimator_model):
    """
    Processes a video clip frame by frame to extract poses.
    Returns a numpy array of (num_frames, num_keypoints, 3).
    """
    cap = cv2.VideoCapture(str(video_clip_path))
    if not cap.isOpened():
        print(f"Error: Could not open video {video_clip_path}")
        return None

    all_frame_poses = []
    num_keypoints = len(pose_estimator_model.dataset_meta['keypoint_names'])
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    pbar = tqdm(total=total_frames, desc=f"Extracting Poses from {video_clip_path.name}", leave=False)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        pose_data = extract_pose_from_frame(frame, detector_model, pose_estimator_model)
        
        if pose_data is not None:
            all_frame_poses.append(pose_data)
        else:
            # Append NaNs if no person is detected or pose fails
            all_frame_poses.append(np.full((num_keypoints, 3), np.nan))
        
        pbar.update(1)
    
    cap.release()
    pbar.close()

    if not all_frame_poses:
        return None
        
    return np.array(all_frame_poses)

print("Corrected pose extraction functions are defined (handles both Tensor and NumPy outputs).")

Corrected pose extraction functions are defined (handles both Tensor and NumPy outputs).


In [17]:
# Cell 7 (FINAL-FIX-2): Main Processing Loop - From Video to Pose Data

import pandas as pd
import requests
import re
import json
from pathlib import Path

# --- Configuration ---
NUM_SIGNS_TO_PROCESS = 10 # Start with 10 signs, increase later
CLEANUP_TEMP_FILES = False # Set to True to delete segmented videos after processing
EXCEL_FILE_PATH = '/kaggle/input/aslvid/asllvd_signs_2024_06_27.xlsx'
ASLLVD_BASE_URL = "http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/"

# --- Directories ---
BASE_WORKING_DIR = Path('/kaggle/working/')
FULL_VIDEO_DOWNLOAD_DIR = BASE_WORKING_DIR / 'full_source_videos'
SEGMENTED_VIDEO_DIR = BASE_WORKING_DIR / 'segmented_sign_clips'
POSE_DATA_OUTPUT_DIR = BASE_WORKING_DIR / 'pose_data'
ANIMATION_OUTPUT_DIR = BASE_WORKING_DIR / 'animations'

# --- Create all directories ---
for d in [FULL_VIDEO_DOWNLOAD_DIR, SEGMENTED_VIDEO_DIR, POSE_DATA_OUTPUT_DIR, ANIMATION_OUTPUT_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# --- Utility Functions (with CORRECTED parser) ---

def load_excel_data(excel_path, num_samples=None):
    try:
        df = pd.read_excel(excel_path)
        essential_cols = ['full video file', 'start frame of the sign (relative to full videos)', 'end frame of the sign (relative to full videos)', 'Video ID number', 'Class Label']
        df.dropna(subset=essential_cols, inplace=True)
        df['start frame of the sign (relative to full videos)'] = df['start frame of the sign (relative to full videos)'].astype(int)
        df['end frame of the sign (relative to full videos)'] = df['end frame of the sign (relative to full videos)'].astype(int)
        if num_samples is not None and num_samples < len(df):
            return df.sample(n=num_samples, random_state=42)
        return df
    except Exception as e:
        print(f"Error loading Excel file: {e}")
        return pd.DataFrame()

def parse_and_download_video(excel_row, download_dir):
    """
    Parses various ASLLVD filename formats and downloads the video.
    This version uses a corrected and robust regular expression.
    """
    full_video_name = excel_row['full video file'].strip()
    
    # This single regex handles all known formats by allowing `_` or `-` before "scene"
    # and correctly escapes the dot in `.mov`.
    match = re.match(r'^(.*?)[_-]scene(\d+)-camera(\d+)\.mov$', full_video_name)
    
    if not match:
        print(f"Warning: Could not parse filename: {full_video_name}")
        return None

    session_dir, scene_num, cam_num = match.groups()
    
    # Construct the URL and local path
    # The server uses a different filename format for some sessions.
    # We need to construct the filename as the server expects it.
    if "Brady-session" in session_dir:
        actual_filename_on_server = f"scene-{scene_num}-camera{cam_num}.mov"
    else:
        actual_filename_on_server = f"scene{scene_num}-camera{cam_num}.mov"

    video_url_path = f"{session_dir}/{actual_filename_on_server}"
    full_url = ASLLVD_BASE_URL + video_url_path
    
    # The local filename should be unique
    local_video_name = f"{session_dir}__{actual_filename_on_server}"
    local_video_path = Path(download_dir) / local_video_name

    if not local_video_path.exists():
        print(f"Downloading: {full_url}")
        try:
            response = requests.get(full_url, stream=True, timeout=300)
            response.raise_for_status()
            with open(local_video_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192*4):
                    f.write(chunk)
            print(f" -> Saved to {local_video_path.name}")
        except requests.exceptions.RequestException as e:
            print(f"  -> ERROR downloading {full_url}: {e}")
            return None
            
    return local_video_path

def segment_video_ffmpeg(full_path, output_path, start_frame, end_frame):
    command = ['ffmpeg', '-hide_banner', '-loglevel', 'error', '-i', str(full_path), '-vf', f"trim=start_frame={start_frame-1}:end_frame={end_frame},setpts=PTS-STARTPTS", '-an', '-y', str(output_path)]
    try:
        subprocess.run(command, check=True)
        if output_path.exists() and output_path.stat().st_size > 0:
            return output_path
        return None
    except subprocess.CalledProcessError as e:
        print(f"Error during ffmpeg segmentation for {full_path.name}: {e}")
        return None

# --- Main Loop ---
excel_data_df = load_excel_data(EXCEL_FILE_PATH, num_samples=NUM_SIGNS_TO_PROCESS)
gloss_to_npz_map = {}

for index, row in tqdm(excel_data_df.iterrows(), total=excel_data_df.shape[0], desc="Processing Signs"):
    video_id = str(row['Video ID number'])
    gloss = str(row['Class Label']).strip().upper()
    start_frame, end_frame = row['start frame of the sign (relative to full videos)'], row['end frame of the sign (relative to full videos)']
    
    print(f"\nProcessing sign: {gloss} (ID: {video_id})")

    # 1. Download full video
    full_video_path = parse_and_download_video(row, FULL_VIDEO_DOWNLOAD_DIR)
    if not full_video_path:
        continue

    # 2. Segment the clip
    safe_gloss = re.sub(r'[^a-zA-Z0-9_]', '', gloss)
    segmented_clip_name = f"sign_{video_id}_{safe_gloss}_{start_frame}-{end_frame}.mp4"
    segmented_clip_path = SEGMENTED_VIDEO_DIR / segmented_clip_name
    
    if not segment_video_ffmpeg(full_video_path, segmented_clip_path, start_frame, end_frame):
        continue

    # 3. Extract Poses
    output_npz_path = POSE_DATA_OUTPUT_DIR / f"poses_{segmented_clip_path.stem}.npz"
    if not output_npz_path.exists():
        pose_data_np = process_video_clip_for_pose(segmented_clip_path, detector, pose_estimator)
        if pose_data_np is not None:
            np.savez_compressed(output_npz_path, poses=pose_data_np)
            print(f"Saved pose data to {output_npz_path}")
        else:
            print(f"Pose extraction failed for {segmented_clip_path.name}")
            continue
            
    # 4. Map Gloss to NPZ file
    if output_npz_path.exists():
        relative_path = str(output_npz_path.relative_to(BASE_WORKING_DIR))
        if gloss not in gloss_to_npz_map:
            gloss_to_npz_map[gloss] = []
        if relative_path not in gloss_to_npz_map[gloss]:
            gloss_to_npz_map[gloss].append(relative_path)
            
    if CLEANUP_TEMP_FILES:
        segmented_clip_path.unlink()

# --- Save the gloss map ---
GLOSS_MAP_FILE = BASE_WORKING_DIR / 'gloss_to_pose_map.json'
with open(GLOSS_MAP_FILE, 'w') as f:
    json.dump(gloss_to_npz_map, f, indent=4)

print(f"\nAll signs processed. Gloss map saved to {GLOSS_MAP_FILE}")
print(f"Total unique glosses mapped: {len(gloss_to_npz_map)}")

Processing Signs:   0%|          | 0/10 [00:00<?, ?it/s]


Processing sign: MOST (ID: 5939)


Extracting Poses from sign_5939_MOST_3242-3272.mp4:   0%|          | 0/31 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_5939_MOST_3242-3272.npz

Processing sign: BLACK+FS-BERRY (ID: 39496)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2008_01_11/scene45-camera1.mov
 -> Saved to ASL_2008_01_11__scene45-camera1.mov


Extracting Poses from sign_39496_BLACKFSBERRY_285-345.mp4:   0%|          | 0/61 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_39496_BLACKFSBERRY_285-345.npz

Processing sign: WATCH (ID: 8777)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2011_07_22_Brady/scene25-camera1.mov
 -> Saved to ASL_2011_07_22_Brady__scene25-camera1.mov


Extracting Poses from sign_8777_WATCH_141-180.mp4:   0%|          | 0/40 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_8777_WATCH_141-180.npz

Processing sign: AFRAID (ID: 857)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2008_01_18/scene39-camera1.mov
 -> Saved to ASL_2008_01_18__scene39-camera1.mov


Extracting Poses from sign_857_AFRAID_2130-2155.mp4:   0%|          | 0/26 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_857_AFRAID_2130-2155.npz

Processing sign: KIND (ID: 5203)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2008_05_29b/scene14-camera1.mov
 -> Saved to ASL_2008_05_29b__scene14-camera1.mov


Extracting Poses from sign_5203_KIND_1180-1221.mp4:   0%|          | 0/42 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_5203_KIND_1180-1221.npz

Processing sign: RUN (ID: 615)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2008_02_01/scene37-camera1.mov
 -> Saved to ASL_2008_02_01__scene37-camera1.mov


Extracting Poses from sign_615_RUN_2780-2845.mp4:   0%|          | 0/66 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_615_RUN_2780-2845.npz

Processing sign: RED (ID: 7219)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2011_07_19_Brady/scene52-camera1.mov
 -> Saved to ASL_2011_07_19_Brady__scene52-camera1.mov


Extracting Poses from sign_7219_RED_1914-1941.mp4:   0%|          | 0/28 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_7219_RED_1914-1941.npz

Processing sign: INFORM (ID: 4978)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2008_08_13/scene26-camera1.mov
 -> Saved to ASL_2008_08_13__scene26-camera1.mov


Extracting Poses from sign_4978_INFORM_4120-4141.mp4:   0%|          | 0/22 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_4978_INFORM_4120-4141.npz

Processing sign: BLOSSOM (ID: 1533)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2008_05_12a/scene29-camera1.mov
 -> Saved to ASL_2008_05_12a__scene29-camera1.mov


Extracting Poses from sign_1533_BLOSSOM_2240-2275.mp4:   0%|          | 0/36 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_1533_BLOSSOM_2240-2275.npz

Processing sign: PLAY-AROUND (ID: 6885)
Downloading: http://csr.bu.edu/ftp/asl/asllvd/asl-data2/quicktime/ASL_2011_07_19_Brady/scene36-camera1.mov
 -> Saved to ASL_2011_07_19_Brady__scene36-camera1.mov


Extracting Poses from sign_6885_PLAYAROUND_436-489.mp4:   0%|          | 0/54 [00:00<?, ?it/s]

Saved pose data to /kaggle/working/pose_data/poses_sign_6885_PLAYAROUND_436-489.npz

All signs processed. Gloss map saved to /kaggle/working/gloss_to_pose_map.json
Total unique glosses mapped: 10


In [18]:
# Cell 8 (FINAL): Animate Skeletons from Pose Data

import cv2
import numpy as np
import json
from pathlib import Path

# --- Configuration ---
# Directories are inherited from the previous cell
CONFIDENCE_THRESHOLD = 0.3
FPS = 30

# --- Load the Gloss Map ---
GLOSS_MAP_FILE = BASE_WORKING_DIR / 'gloss_to_pose_map.json'
if not GLOSS_MAP_FILE.exists():
    print(f"ERROR: Gloss map not found at {GLOSS_MAP_FILE}. Cannot create animations.")
else:
    with open(GLOSS_MAP_FILE, 'r') as f:
        gloss_to_npz_map = json.load(f)

    # --- Use skeleton links from the initialized model ---
    skeleton_links = pose_estimator.dataset_meta.get('skeleton_links', [])
    if not skeleton_links:
        print("Warning: Could not get skeleton links from model, animations may be incorrect.")

    # --- Color Definitions (BGR for OpenCV) ---
    COLOR_BODY = (255, 128, 0)   # Blue
    COLOR_FACE = (100, 200, 0)   # Green
    COLOR_HANDS = (100, 100, 255) # Red
    COLOR_SKELETON = (200, 200, 200) # Light Grey
    
    # Keypoint indices for coloring
    FACE_KP_INDICES = list(range(23, 91))
    LEFT_HAND_KP_INDICES = list(range(91, 112))
    RIGHT_HAND_KP_INDICES = list(range(112, 133))

    print(f"\nStarting animation generation for {len(gloss_to_npz_map)} unique glosses...")

    for gloss, npz_paths in tqdm(gloss_to_npz_map.items(), desc="Generating Animations"):
        if not npz_paths: continue
        
        # Animate the first available video for each gloss
        npz_relative_path = Path(npz_paths[0])
        npz_full_path = BASE_WORKING_DIR / npz_relative_path
        
        safe_gloss = re.sub(r'[^a-zA-Z0-9_]', '', gloss)
        animation_output_path = ANIMATION_OUTPUT_DIR / f"anim_{safe_gloss}_{npz_full_path.stem}.mp4"

        try:
            data = np.load(npz_full_path)
            pose_data = data['poses']
        except Exception as e:
            print(f"Could not load {npz_full_path}: {e}")
            continue

        # Filter out low-confidence keypoints to find animation bounds
        visible_keypoints = pose_data[pose_data[..., 2] > CONFIDENCE_THRESHOLD]
        if visible_keypoints.shape[0] == 0:
            print(f"Skipping '{gloss}': No keypoints found above confidence threshold.")
            continue

        # Determine canvas size
        min_x, min_y = np.min(visible_keypoints[:, :2], axis=0)
        max_x, max_y = np.max(visible_keypoints[:, :2], axis=0)
        padding = 50
        width = int(max_x - min_x) + (2 * padding)
        height = int(max_y - min_y) + (2 * padding)
        offset_x = int(min_x - padding)
        offset_y = int(min_y - padding)
        
        # Ensure even dimensions for video codecs
        width += (width % 2)
        height += (height % 2)

        # Initialize VideoWriter
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(str(animation_output_path), fourcc, FPS, (width, height))

        for frame_idx in range(pose_data.shape[0]):
            keypoints = pose_data[frame_idx]
            canvas = np.zeros((height, width, 3), dtype=np.uint8) # Black background

            # Draw skeleton
            for start_idx, end_idx in skeleton_links:
                if keypoints[start_idx, 2] > CONFIDENCE_THRESHOLD and keypoints[end_idx, 2] > CONFIDENCE_THRESHOLD:
                    pt1 = (int(keypoints[start_idx, 0] - offset_x), int(keypoints[start_idx, 1] - offset_y))
                    pt2 = (int(keypoints[end_idx, 0] - offset_x), int(keypoints[end_idx, 1] - offset_y))
                    cv2.line(canvas, pt1, pt2, COLOR_SKELETON, 1, cv2.LINE_AA)
            
            # Draw keypoints
            for i, (x, y, conf) in enumerate(keypoints):
                if conf > CONFIDENCE_THRESHOLD:
                    center = (int(x - offset_x), int(y - offset_y))
                    color = COLOR_BODY # Default
                    if i in FACE_KP_INDICES: color = COLOR_FACE
                    elif i in LEFT_HAND_KP_INDICES or i in RIGHT_HAND_KP_INDICES: color = COLOR_HANDS
                    cv2.circle(canvas, center, 3, color, -1, cv2.LINE_AA)

            video_writer.write(canvas)
        
        video_writer.release()

    print(f"\nFinished! All animations saved in: {ANIMATION_OUTPUT_DIR}")


Starting animation generation for 10 unique glosses...


Generating Animations:   0%|          | 0/10 [00:00<?, ?it/s]


Finished! All animations saved in: /kaggle/working/animations


In [20]:
# Cell 9 (CORRECTED): Normalize Poses and Export to JSON for Three.js

import numpy as np
import json
import re
from pathlib import Path

# --- Configuration ---
JSON_OUTPUT_DIR = BASE_WORKING_DIR / 'json_for_animator'
JSON_OUTPUT_DIR.mkdir(exist_ok=True)

# Keypoint indices for normalization
LEFT_HIP_IDX = 11
RIGHT_HIP_IDX = 12
LEFT_SHOULDER_IDX = 5
RIGHT_SHOULDER_IDX = 6

# --- Helper Functions ---

def normalize_pose_data(pose_data):
    """
    Normalizes a sequence of poses to be centered and scaled.
    Input shape: (num_frames, num_keypoints, 3)
    """
    normalized_data = np.copy(pose_data)
    
    for frame_idx in range(normalized_data.shape[0]):
        frame_keypoints = normalized_data[frame_idx]
        
        # 1. Anchor to Hip Center
        if pose_data[frame_idx, LEFT_HIP_IDX, 2] < 0.3 or pose_data[frame_idx, RIGHT_HIP_IDX, 2] < 0.3:
            normalized_data[frame_idx, :, :] = np.nan
            continue
        hip_center = (frame_keypoints[LEFT_HIP_IDX, :2] + frame_keypoints[RIGHT_HIP_IDX, :2]) / 2.0
        frame_keypoints[:, :2] -= hip_center
        
        # 2. Scale by Shoulder Width
        if pose_data[frame_idx, LEFT_SHOULDER_IDX, 2] < 0.3 or pose_data[frame_idx, RIGHT_SHOULDER_IDX, 2] < 0.3:
            normalized_data[frame_idx, :, :] = np.nan
            continue
        shoulder_width = np.linalg.norm(frame_keypoints[LEFT_SHOULDER_IDX, :2] - frame_keypoints[RIGHT_SHOULDER_IDX, :2])
        if shoulder_width < 1e-6:
            normalized_data[frame_idx, :, :] = np.nan
            continue
        frame_keypoints[:, :2] /= shoulder_width
        
        normalized_data[frame_idx] = frame_keypoints
        
    return normalized_data


# --- Main Export Logic ---

GLOSS_MAP_FILE = BASE_WORKING_DIR / 'gloss_to_pose_map.json'
if not GLOSS_MAP_FILE.exists():
    print(f"ERROR: Gloss map not found at {GLOSS_MAP_FILE}. Cannot export.")
else:
    with open(GLOSS_MAP_FILE, 'r') as f:
        gloss_to_npz_map = json.load(f)

    # Get metadata from the model and ensure it's in standard Python types
    keypoint_names = pose_estimator.dataset_meta.get('keypoint_names', [])
    raw_skeleton_links = pose_estimator.dataset_meta.get('skeleton_links', [])
    # <<< FIX #1: Convert skeleton links to standard Python ints >>>
    skeleton_links = [[int(p1), int(p2)] for p1, p2 in raw_skeleton_links]
    
    print(f"\nStarting JSON export for {len(gloss_to_npz_map)} unique glosses...")

    for gloss, npz_paths in tqdm(gloss_to_npz_map.items(), desc="Exporting to JSON"):
        if not npz_paths: continue
        
        npz_relative_path = Path(npz_paths[0])
        npz_full_path = BASE_WORKING_DIR / npz_relative_path

        try:
            data = np.load(npz_full_path)
            raw_pose_data = data['poses']
        except Exception as e:
            print(f"Could not load {npz_full_path}: {e}")
            continue
            
        normalized_poses = normalize_pose_data(raw_pose_data)
        
        json_frames = []
        for frame in normalized_poses:
            if np.isnan(frame).any():
                continue
                
            keypoints_for_frame = []
            for x, y, conf in frame:
                # <<< FIX #2: Convert keypoint data to standard Python floats >>>
                keypoints_for_frame.append([float(x), float(y), 0.0, float(conf)]) 
            json_frames.append(keypoints_for_frame)
            
        if not json_frames:
            print(f"Skipping '{gloss}': No valid frames after normalization.")
            continue

        output_data = {
            "sign_name": gloss,
            "fps": 60,
            "total_frames": len(json_frames),
            "keypoint_names": keypoint_names,
            "skeleton_links": skeleton_links,
            "frames": json_frames
        }

        safe_gloss_name = re.sub(r'[^a-zA-Z0-9_]', '', gloss)
        json_output_path = JSON_OUTPUT_DIR / f"{safe_gloss_name}.json"
        
        with open(json_output_path, 'w') as f:
            json.dump(output_data, f)

    print(f"\nFinished! All JSON files saved in: {JSON_OUTPUT_DIR}")



Starting JSON export for 10 unique glosses...


Exporting to JSON:   0%|          | 0/10 [00:00<?, ?it/s]


Finished! All JSON files saved in: /kaggle/working/json_for_animator
