# Debug Labels
This notebook investigates why the dataset is yielding all-zero labels. We will inspect `train.csv`, annotation files, and the label creation logic.

In [1]:
import pandas as pd
import numpy as np
import os
import json
import ast
from pathlib import Path

# Setup paths
data_dir = Path('../data')
if not data_dir.exists():
    data_dir = Path('data') # Fallback if running from root

print(f"Data Directory: {data_dir.resolve()}")

# Load train.csv
train_csv_path = data_dir / 'train.csv'
df_train = pd.read_csv(train_csv_path)
print(f"Loaded train.csv with {len(df_train)} rows.")
df_train.head()

Data Directory: C:\Users\Windows11\Downloads\mice_social_action_new\data
Loaded train.csv with 8789 rows.


Unnamed: 0,lab_id,video_id,mouse1_strain,mouse1_color,mouse1_sex,mouse1_id,mouse1_age,mouse1_condition,mouse2_strain,mouse2_color,...,pix_per_cm_approx,video_width_pix,video_height_pix,arena_width_cm,arena_height_cm,arena_shape,arena_type,body_parts_tracked,behaviors_labeled,tracking_method
0,AdaptableSnail,44566106,CD-1 (ICR),white,male,10.0,8-12 weeks,wireless device,CD-1 (ICR),white,...,16.0,1228,1068,60.0,60.0,square,familiar,"[""body_center"", ""ear_left"", ""ear_right"", ""head...","[""mouse1,mouse2,approach"", ""mouse1,mouse2,atta...",DeepLabCut
1,AdaptableSnail,143861384,CD-1 (ICR),white,male,3.0,8-12 weeks,,CD-1 (ICR),white,...,9.7,968,608,60.0,60.0,square,familiar,"[""body_center"", ""ear_left"", ""ear_right"", ""late...","[""mouse1,mouse2,approach"", ""mouse1,mouse2,atta...",DeepLabCut
2,AdaptableSnail,209576908,CD-1 (ICR),white,male,7.0,8-12 weeks,,CD-1 (ICR),white,...,16.0,1266,1100,60.0,60.0,square,familiar,"[""body_center"", ""ear_left"", ""ear_right"", ""late...","[""mouse1,mouse2,approach"", ""mouse1,mouse2,atta...",DeepLabCut
3,AdaptableSnail,278643799,CD-1 (ICR),white,male,11.0,8-12 weeks,wireless device,CD-1 (ICR),white,...,16.0,1224,1100,60.0,60.0,square,familiar,"[""body_center"", ""ear_left"", ""ear_right"", ""head...","[""mouse1,mouse2,approach"", ""mouse1,mouse2,atta...",DeepLabCut
4,AdaptableSnail,351967631,CD-1 (ICR),white,male,14.0,8-12 weeks,,CD-1 (ICR),white,...,16.0,1204,1068,60.0,60.0,square,familiar,"[""body_center"", ""ear_left"", ""ear_right"", ""late...","[""mouse1,mouse2,approach"", ""mouse1,mouse2,atta...",DeepLabCut


In [2]:
# Select a sample video
# Let's pick the first one
row = df_train.iloc[0]
lab_id = row['lab_id']
video_id = str(row['video_id'])
behaviors_str = row['behaviors_labeled']

try:
    classes = json.loads(behaviors_str)
except:
    try:
        classes = ast.literal_eval(behaviors_str)
    except:
        classes = []

print(f"Lab ID: {lab_id}")
print(f"Video ID: {video_id}")
print(f"Classes ({len(classes)}): {classes}")

# Construct annotation path
anno_path = data_dir / 'train_annotation' / lab_id / f'{video_id}.parquet'
print(f"Annotation Path: {anno_path}")

if not anno_path.exists():
    print("ERROR: Annotation file does not exist!")
else:
    df_anno = pd.read_parquet(anno_path)
    print(f"Loaded annotation with {len(df_anno)} rows.")
    print("Columns:", df_anno.columns.tolist())
    if not df_anno.empty:
        print("\nSample Rows:")
        print(df_anno.head())
        
        print("\nUnique Actions in Annotation:")
        unique_actions = df_anno['action'].unique()
        print(unique_actions)
        
        # Check intersection
        common = set(unique_actions) & set(classes)
        print(f"\nCommon Actions (Annotation vs Train.csv): {common}")
        
        if not common:
            print("WARNING: No overlap between annotation actions and train.csv classes!")

Lab ID: AdaptableSnail
Video ID: 44566106
Classes (76): ['mouse1,mouse2,approach', 'mouse1,mouse2,attack', 'mouse1,mouse2,avoid', 'mouse1,mouse2,chase', 'mouse1,mouse2,chaseattack', 'mouse1,mouse2,submit', 'mouse1,mouse3,approach', 'mouse1,mouse3,attack', 'mouse1,mouse3,avoid', 'mouse1,mouse3,chase', 'mouse1,mouse3,chaseattack', 'mouse1,mouse3,submit', 'mouse1,mouse4,approach', 'mouse1,mouse4,attack', 'mouse1,mouse4,avoid', 'mouse1,mouse4,chase', 'mouse1,mouse4,chaseattack', 'mouse1,mouse4,submit', 'mouse1,self,rear', 'mouse2,mouse1,approach', 'mouse2,mouse1,attack', 'mouse2,mouse1,avoid', 'mouse2,mouse1,chase', 'mouse2,mouse1,chaseattack', 'mouse2,mouse1,submit', 'mouse2,mouse3,approach', 'mouse2,mouse3,attack', 'mouse2,mouse3,avoid', 'mouse2,mouse3,chase', 'mouse2,mouse3,chaseattack', 'mouse2,mouse3,submit', 'mouse2,mouse4,approach', 'mouse2,mouse4,attack', 'mouse2,mouse4,avoid', 'mouse2,mouse4,chase', 'mouse2,mouse4,chaseattack', 'mouse2,mouse4,submit', 'mouse2,self,rear', 'mouse3,m

In [8]:
# Simulate Label Creation Logic
import torch

def create_label_tensor(df_anno, start_frame, end_frame, num_frames, classes):
    class_to_idx = {cls: i for i, cls in enumerate(classes)}
    num_classes = len(classes)
    labels = torch.zeros((num_frames, num_classes), dtype=torch.float32)
    
    if df_anno.empty:
        return labels
        
    # Filter
    df_slice = df_anno[
        (df_anno['start_frame'] < end_frame) & 
        (df_anno['stop_frame'] > start_frame)
    ]
    
    print(f"Slice ({start_frame}-{end_frame}): Found {len(df_slice)} annotations.")
    
    for _, r in df_slice.iterrows():
        # New Logic: Construct composite class name
        try:
            agent_id = int(r['agent_id'])
            target_id = int(r['target_id'])
            action = r['action']
            
            subject = f"mouse{agent_id + 1}"
            if agent_id == target_id:
                obj = "self"
            else:
                obj = f"mouse{target_id + 1}"
                
            composite_action = f"{subject},{obj},{action}"
        except:
            composite_action = r['action']

        s = r['start_frame']
        e = r['stop_frame']
        
        target_class = None
        if composite_action in class_to_idx:
            target_class = composite_action
        elif r['action'] in class_to_idx:
            target_class = r['action']
            
        if target_class:
            idx = class_to_idx[target_class]
            s_w = max(0, int(s - start_frame))
            e_w = min(num_frames, int(e - start_frame))
            
            if s_w < e_w:
                labels[s_w:e_w, idx] = 1.0
                print(f"  Marked {target_class} at {s_w}:{e_w} (idx: {idx})")
        else:
            # Only print warning if it's not the composite one we just constructed (to avoid double noise)
            print(f"  WARNING: Action '{composite_action}' (or '{r['action']}') not in classes list!")
                
    return labels

# Find a window that SHOULD have labels
if not df_anno.empty:
    # Pick the first annotation
    first_anno = df_anno.iloc[0]
    action = first_anno['action']
    start_f = first_anno['start_frame']
    stop_f = first_anno['stop_frame']
    
    print(f"\nTesting Window around frame {start_f} for action '{action}'...")
    
    # Define a window around this action
    window_size = 512
    # Align start to be a multiple of stride (window_size) like in Dataset
    w_idx = int(start_f // window_size)
    w_start = w_idx * window_size
    w_end = w_start + window_size
    
    print(f"Window: {w_start} - {w_end}")
    
    labels = create_label_tensor(df_anno, w_start, w_end, window_size, classes)
    
    print(f"Label Tensor Sum: {labels.sum().item()}")
    print(f"Label Tensor Max: {labels.max().item()}")
else:
    print("No annotations to test.")


Testing Window around frame 4 for action 'rear'...
Window: 0 - 512
Slice (0-512): Found 10 annotations.
  Marked mouse3,self,rear at 4:139 (idx: 56)
  Marked mouse4,self,rear at 156:213 (idx: 75)
  Marked mouse4,self,rear at 263:332 (idx: 75)
  Marked mouse2,self,rear at 299:308 (idx: 37)
  Marked mouse2,self,rear at 322:364 (idx: 37)
  Marked mouse4,self,rear at 375:393 (idx: 75)
  Marked mouse2,self,rear at 502:512 (idx: 37)
Label Tensor Sum: 340.0
Label Tensor Max: 1.0


In [7]:
# Inspect Annotation Columns
# We need to know the column names to map 'rear' to 'mouse1,self,rear'
if not df_anno.empty:
    print("Annotation Columns:", df_anno.columns.tolist())
    print("First Annotation Row:", df_anno.iloc[0].to_dict())

Annotation Columns: ['agent_id', 'target_id', 'action', 'start_frame', 'stop_frame']
First Annotation Row: {'agent_id': 2, 'target_id': 2, 'action': 'rear', 'start_frame': 4, 'stop_frame': 139}
