In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import itertools
import warnings
import optuna
import joblib
import gc
import os
import json
from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score
from xgboost import XGBClassifier
from tqdm import tqdm

optuna.logging.set_verbosity(optuna.logging.WARNING)
warnings.filterwarnings('ignore')

In [None]:
class CFG:
    # [NEW] Memory Optimization: Limit max samples per video/pair
    train_path = "/kaggle/input/MABe-mouse-behavior-detection/train.csv"
    test_path = "/kaggle/input/MABe-mouse-behavior-detection/test.csv"
    train_annotation_path = "/kaggle/input/MABe-mouse-behavior-detection/train_annotation"
    train_tracking_path = "/kaggle/input/MABe-mouse-behavior-detection/train_tracking"
    test_tracking_path = "/kaggle/input/MABe-mouse-behavior-detection/test_tracking"

    model_path = "models"
    model_name_xgb_base = "xgboost_unified_base"
    
    n_splits = 3
    
    # A high confidence threshold to ensure only high-quality pseudo-labels are used.
    CONFIDENCE_THRESHOLD = 0.90

    # Define a canonical set of body parts to be used for all feature generation.
    CANONICAL_BODY_PARTS = [
        'nose', 'ear_left', 'ear_right', 'body_center', 'tail_base', 
        'spine_middle', 'tail_tip'
    ]

    # XGBoost parameters
    xgb_params = {
        'n_estimators': 300,
        'learning_rate': 0.05,
        'max_depth': 6,
        'n_jobs': -1,
        'tree_method': 'hist', # Changed to 'hist' for CPU/Mac compatibility
        'random_state': 42,
        'device': 'cuda'
    }

    # Define label groups for rare behaviors
    OTHERS1_ACTIONS = [
        'climb', 'dominancegroom', 'allogroom', 'attemptmount', 
        'reciprocalsniff', 'dominancemount', 'disengage', 'defend'
    ]
    
    OTHERS2_ACTIONS = [
        'ejaculate', 'dominance', 'freeze', 'huddle', 'shepherd', 
        'genitalgroom', 'run',  'rest', 'flinch', 
        'tussle', 'biteobject', 'exploreobject', 'follow'
    ]

In [None]:
# Create model directories
os.makedirs(f"{CFG.model_path}/{CFG.model_name_xgb_base}", exist_ok=True)
os.makedirs(f"{CFG.model_path}/{CFG.model_name_xgb_augmented}", exist_ok=True)

drop_body_parts =  [
    'headpiece_bottombackleft', 'headpiece_bottombackright', 'headpiece_bottomfrontleft', 'headpiece_bottomfrontright', 
    'headpiece_topbackleft', 'headpiece_topbackright', 'headpiece_topfrontleft', 'headpiece_topfrontright', 
    'spine_1', 'spine_2', 'tail_middle_1', 'tail_middle_2', 'tail_midpoint'
]

In [4]:
# =============================================================================
# Data Loading and Feature Engineering Functions
# =============================================================================
def safe_rolling(series, window, func, min_periods=None):
    if min_periods is None:
        min_periods = max(1, window // 4)
    return series.rolling(window, min_periods=min_periods, center=True).apply(func, raw=True)

def _scale(n_frames_at_30fps, fps, ref=30.0):
    return max(1, int(round(n_frames_at_30fps * float(fps) / ref)))

def _scale_signed(n_frames_at_30fps, fps, ref=30.0):
    if n_frames_at_30fps == 0:
        return 0
    s = 1 if n_frames_at_30fps > 0 else -1
    mag = max(1, int(round(abs(n_frames_at_30fps) * float(fps) / ref)))
    return s * mag

def compute_egocentric_transform(mouse_df, body_parts, target_points=None):
    """
    Transforms coordinates to an egocentric frame based on body_center and tail_base.
    Origin: body_center
    Y-axis: vector from tail_base to body_center (spine points UP)
    """
    if 'body_center' not in mouse_df.columns or 'tail_base' not in mouse_df.columns:
        return None
    
    # 1. Translation: Center at body_center
    origin_x = mouse_df['body_center']['x']
    origin_y = mouse_df['body_center']['y']
    
    # 2. Rotation: Align spine (tail_base -> body_center) with Y-axis
    # Vector from tail to center
    spine_x = mouse_df['body_center']['x'] - mouse_df['tail_base']['x']
    spine_y = mouse_df['body_center']['y'] - mouse_df['tail_base']['y']
    
    # Angle of spine relative to Y-axis (0, 1)
    # We want to rotate so that (spine_x, spine_y) becomes (0, +mag)
    # Current angle of spine in global coords
    theta = np.arctan2(spine_y, spine_x)
    # We want this to be pi/2 (90 degrees, pointing up)
    # So we rotate by (pi/2 - theta)
    rotation_angle = np.pi/2 - theta
    
    cos_a = np.cos(rotation_angle)
    sin_a = np.sin(rotation_angle)
    
    ego_data = {}
    
    # Transform own body parts
    for part in body_parts:
        if part in mouse_df.columns:
            # Translate
            dx = mouse_df[part]['x'] - origin_x
            dy = mouse_df[part]['y'] - origin_y
            # Rotate
            ego_data[f'ego_{part}_x'] = dx * cos_a - dy * sin_a
            ego_data[f'ego_{part}_y'] = dx * sin_a + dy * cos_a
            
    # Transform target points (e.g., other mouse's body parts) if provided
    if target_points is not None:
        for col_name, (tx, ty) in target_points.items():
            dx = tx - origin_x
            dy = ty - origin_y
            ego_data[f'ego_target_{col_name}_x'] = dx * cos_a - dy * sin_a
            ego_data[f'ego_target_{col_name}_y'] = dx * sin_a + dy * cos_a
            
    return pd.DataFrame(ego_data, index=mouse_df.index)


def _fps_from_meta(meta_df, fallback_lookup, default_fps=30.0):
    if 'frames_per_second' in meta_df.columns and pd.notnull(meta_df['frames_per_second']).any():
        return float(meta_df['frames_per_second'].iloc[0])
    vid = meta_df['video_id'].iloc[0]
    return float(fallback_lookup.get(vid, default_fps))

def add_curvature_features(X, center_x, center_y, fps):
    vel_x = center_x.diff()
    vel_y = center_y.diff()
    acc_x = vel_x.diff()
    acc_y = vel_y.diff()
    cross_prod = vel_x * acc_y - vel_y * acc_x
    vel_mag = np.sqrt(vel_x**2 + vel_y**2)
    curvature = np.abs(cross_prod) / (vel_mag**3 + 1e-6)
    for w in [25, 50, 75]:
        ws = _scale(w, fps)
        X[f'curv_mean_{w}'] = curvature.rolling(ws, min_periods=max(1, ws // 5)).mean()
    angle = np.arctan2(vel_y, vel_x)
    angle_change = np.abs(angle.diff())
    w = 30
    ws = _scale(w, fps)
    X[f'turn_rate_{w}'] = angle_change.rolling(ws, min_periods=max(1, ws // 5)).sum()
    return X

def add_multiscale_features(X, center_x, center_y, fps):
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)
    scales = [20, 40, 60, 80]
    for scale in scales:
        ws = _scale(scale, fps)
        if len(speed) >= ws:
            X[f'sp_m{scale}'] = speed.rolling(ws, min_periods=max(1, ws // 4)).mean()
            X[f'sp_s{scale}'] = speed.rolling(ws, min_periods=max(1, ws // 4)).std()
    if len(scales) >= 2 and f'sp_m{scales[0]}' in X.columns and f'sp_m{scales[-1]}' in X.columns:
        X['sp_ratio'] = X[f'sp_m{scales[0]}'] / (X[f'sp_m{scales[-1]}'] + 1e-6)
    return X

def add_state_features(X, center_x, center_y, fps):
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)
    w_ma = _scale(15, fps)
    speed_ma = speed.rolling(w_ma, min_periods=max(1, w_ma // 3)).mean()
    try:
        bins = [-np.inf, 0.5 * fps, 2.0 * fps, 5.0 * fps, np.inf]
        speed_states = pd.cut(speed_ma, bins=bins, labels=[0, 1, 2, 3]).astype(float)
        for window in [20, 40, 60, 80]:
            ws = _scale(window, fps)
            if len(speed_states) >= ws:
                for state in [0, 1, 2, 3]:
                    X[f's{state}_{window}'] = ((speed_states == state).astype(float).rolling(ws, min_periods=max(1, ws // 5)).mean())
                state_changes = (speed_states != speed_states.shift(1)).astype(float)
                X[f'trans_{window}'] = state_changes.rolling(ws, min_periods=max(1, ws // 5)).sum()
    except Exception:
        pass
    return X

def add_longrange_features(X, center_x, center_y, fps):
    for window in [30, 60, 120]:
        ws = _scale(window, fps)
        if len(center_x) >= ws:
            X[f'x_ml{window}'] = center_x.rolling(ws, min_periods=max(5, ws // 6)).mean()
            X[f'y_ml{window}'] = center_y.rolling(ws, min_periods=max(5, ws // 6)).mean()
    for span in [30, 60, 120]:
        s = _scale(span, fps)
        X[f'x_e{span}'] = center_x.ewm(span=s, min_periods=1).mean()
        X[f'y_e{span}'] = center_y.ewm(span=s, min_periods=1).mean()
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)
    for window in [30, 60, 120]:
        ws = _scale(window, fps)
        if len(speed) >= ws:
            X[f'sp_pct{window}'] = speed.rolling(ws, min_periods=max(5, ws // 6)).rank(pct=True)
    return X

def add_interaction_features(X, mouse_pair, avail_A, avail_B, fps):
    if 'body_center' not in avail_A or 'body_center' not in avail_B:
        return X
    rel_x = mouse_pair['A']['body_center']['x'] - mouse_pair['B']['body_center']['x']
    rel_y = mouse_pair['A']['body_center']['y'] - mouse_pair['B']['body_center']['y']
    rel_dist = np.sqrt(rel_x**2 + rel_y**2)
    A_vx = mouse_pair['A']['body_center']['x'].diff()
    A_vy = mouse_pair['A']['body_center']['y'].diff()
    B_vx = mouse_pair['B']['body_center']['x'].diff()
    B_vy = mouse_pair['B']['body_center']['y'].diff()
    A_lead = (A_vx * rel_x + A_vy * rel_y) / (np.sqrt(A_vx**2 + A_vy**2) * rel_dist + 1e-6)
    B_lead = (B_vx * (-rel_x) + B_vy * (-rel_y)) / (np.sqrt(B_vx**2 + B_vy**2) * rel_dist + 1e-6)
    for window in [30, 60]:
        ws = _scale(window, fps)
        X[f'A_ld{window}'] = A_lead.rolling(ws, min_periods=max(1, ws // 6)).mean()
        X[f'B_ld{window}'] = B_lead.rolling(ws, min_periods=max(1, ws // 6)).mean()
    approach = -rel_dist.diff()
    chase = approach * B_lead
    w = 30
    ws = _scale(w, fps)
    X[f'chase_{w}'] = chase.rolling(ws, min_periods=max(1, ws // 6)).mean()
    for window in [60, 120]:
        ws = _scale(window, fps)
        A_sp = np.sqrt(A_vx**2 + A_vy**2)
        B_sp = np.sqrt(B_vx**2 + B_vy**2)
        X[f'sp_cor{window}'] = A_sp.rolling(ws, min_periods=max(1, ws // 6)).corr(B_sp)
    return X

In [5]:
def transform_single(single_mouse, body_parts_tracked, fps):
    available_body_parts = single_mouse.columns.get_level_values(0)
    X = pd.DataFrame({f"{p1}+{p2}": np.square(single_mouse[p1] - single_mouse[p2]).sum(axis=1, skipna=False) for p1, p2 in itertools.combinations(body_parts_tracked, 2) if p1 in available_body_parts and p2 in available_body_parts})
    X = X.reindex(columns=[f"{p1}+{p2}" for p1, p2 in itertools.product(body_parts_tracked, repeat=2)], copy=False)
    if all(p in single_mouse.columns for p in ['ear_left', 'ear_right', 'tail_base']):
        lag = _scale(10, fps)
        shifted = single_mouse[['ear_left', 'ear_right', 'tail_base']].shift(lag)
        speeds = pd.DataFrame({'sp_lf': np.square(single_mouse['ear_left'] - shifted['ear_left']).sum(axis=1, skipna=False), 'sp_rt': np.square(single_mouse['ear_right'] - shifted['ear_right']).sum(axis=1, skipna=False), 'sp_lf2': np.square(single_mouse['ear_left'] - shifted['tail_base']).sum(axis=1, skipna=False), 'sp_rt2': np.square(single_mouse['ear_right'] - shifted['tail_base']).sum(axis=1, skipna=False)})
        X = pd.concat([X, speeds], axis=1)
    if 'nose+tail_base' in X.columns and 'ear_left+ear_right' in X.columns:
        X['elong'] = X['nose+tail_base'] / (X['ear_left+ear_right'] + 1e-6)
    if all(p in available_body_parts for p in ['nose', 'body_center', 'tail_base']):
        v1 = single_mouse['nose'] - single_mouse['body_center']
        v2 = single_mouse['tail_base'] - single_mouse['body_center']
        X['body_ang'] = (v1['x'] * v2['x'] + v1['y'] * v2['y']) / (np.sqrt(v1['x']**2 + v1['y']**2) * np.sqrt(v2['x']**2 + v2['y']**2) + 1e-6)
    if 'body_center' in available_body_parts:
        cx = single_mouse['body_center']['x']
        cy = single_mouse['body_center']['y']
        for w in [5, 15, 30, 60]:
            ws = _scale(w, fps)
            roll = dict(min_periods=1, center=True)
            X[f'cx_m{w}'] = cx.rolling(ws, **roll).mean()
            X[f'cy_m{w}'] = cy.rolling(ws, **roll).mean()
            X[f'cx_s{w}'] = cx.rolling(ws, **roll).std()
            X[f'cy_s{w}'] = cy.rolling(ws, **roll).std()
            X[f'x_rng{w}'] = cx.rolling(ws, **roll).max() - cx.rolling(ws, **roll).min()
            X[f'y_rng{w}'] = cy.rolling(ws, **roll).max() - cy.rolling(ws, **roll).min()
            X[f'disp{w}'] = np.sqrt(cx.diff().rolling(ws, min_periods=1).sum()**2 + cy.diff().rolling(ws, min_periods=1).sum()**2)
            X[f'act{w}'] = np.sqrt(cx.diff().rolling(ws, min_periods=1).var() + cy.diff().rolling(ws, min_periods=1).var())
        X = add_curvature_features(X, cx, cy, fps)
        X = add_multiscale_features(X, cx, cy, fps)
        X = add_state_features(X, cx, cy, fps)
        X = add_longrange_features(X, cx, cy, fps)
    if all(p in available_body_parts for p in ['nose', 'tail_base']):
        nt_dist = np.sqrt((single_mouse['nose']['x'] - single_mouse['tail_base']['x'])**2 + (single_mouse['nose']['y'] - single_mouse['tail_base']['y'])**2)
        for lag in [10, 20, 40]:
            l = _scale(lag, fps)
            X[f'nt_lg{lag}'] = nt_dist.shift(l)
            X[f'nt_df{lag}'] = nt_dist - nt_dist.shift(l)
    if all(p in available_body_parts for p in ['ear_left', 'ear_right']):
        ear_d = np.sqrt((single_mouse['ear_left']['x'] - single_mouse['ear_right']['x'])**2 + (single_mouse['ear_left']['y'] - single_mouse['ear_right']['y'])**2)
        for off in [-30, -20, -10, 10, 20, 30]:
            o = _scale_signed(off, fps)
            X[f'ear_o{off}'] = ear_d.shift(-o)
        w = _scale(30, fps)
        X['ear_con'] = ear_d.rolling(w, min_periods=1, center=True).std() / (ear_d.rolling(w, min_periods=1, center=True).mean() + 1e-6)
    
    # Add Egocentric Features
    ego_df = compute_egocentric_transform(single_mouse, body_parts_tracked)
    if ego_df is not None:
        X = pd.concat([X, ego_df], axis=1)

    return X.astype(np.float32, copy=False)

def transform_pair(mouse_pair, body_parts_tracked, fps):
    avail_A = mouse_pair['A'].columns.get_level_values(0)
    avail_B = mouse_pair['B'].columns.get_level_values(0)
    X = pd.DataFrame({f"12+{p1}+{p2}": np.square(mouse_pair['A'][p1] - mouse_pair['B'][p2]).sum(axis=1, skipna=False) for p1, p2 in itertools.product(body_parts_tracked, repeat=2) if p1 in avail_A and p2 in avail_B})
    X = X.reindex(columns=[f"12+{p1}+{p2}" for p1, p2 in itertools.product(body_parts_tracked, repeat=2)], copy=False)
    if ('A', 'ear_left') in mouse_pair.columns and ('B', 'ear_left') in mouse_pair.columns:
        lag = _scale(10, fps)
        shA = mouse_pair['A']['ear_left'].shift(lag)
        shB = mouse_pair['B']['ear_left'].shift(lag)
        speeds = pd.DataFrame({'sp_A': np.square(mouse_pair['A']['ear_left'] - shA).sum(axis=1, skipna=False), 'sp_AB': np.square(mouse_pair['A']['ear_left'] - shB).sum(axis=1, skipna=False), 'sp_B': np.square(mouse_pair['B']['ear_left'] - shB).sum(axis=1, skipna=False)})
        X = pd.concat([X, speeds], axis=1)
    if 'nose+tail_base' in X.columns and 'ear_left+ear_right' in X.columns:
        X['elong'] = X['nose+tail_base'] / (X['ear_left+ear_right'] + 1e-6)
    if all(p in avail_A for p in ['nose', 'tail_base']) and all(p in avail_B for p in ['nose', 'tail_base']):
        dir_A = mouse_pair['A']['nose'] - mouse_pair['A']['tail_base']
        dir_B = mouse_pair['B']['nose'] - mouse_pair['B']['tail_base']
        X['rel_ori'] = (dir_A['x'] * dir_B['x'] + dir_A['y'] * dir_B['y']) / (np.sqrt(dir_A['x']**2 + dir_A['y']**2) * np.sqrt(dir_B['x']**2 + dir_B['y']**2) + 1e-6)
    if all(p in avail_A for p in ['nose']) and all(p in avail_B for p in ['nose']):
        cur = np.square(mouse_pair['A']['nose'] - mouse_pair['B']['nose']).sum(axis=1, skipna=False)
        lag = _scale(10, fps)
        shA_n = mouse_pair['A']['nose'].shift(lag)
        shB_n = mouse_pair['B']['nose'].shift(lag)
        past = np.square(shA_n - shB_n).sum(axis=1, skipna=False)
        X['appr'] = cur - past
    if 'body_center' in avail_A and 'body_center' in avail_B:
        cd = np.sqrt((mouse_pair['A']['body_center']['x'] - mouse_pair['B']['body_center']['x'])**2 + (mouse_pair['A']['body_center']['y'] - mouse_pair['B']['body_center']['y'])**2)
        X['v_cls'] = (cd < 5.0).astype(float)
        X['cls']   = ((cd >= 5.0) & (cd < 15.0)).astype(float)
        X['med']   = ((cd >= 15.0) & (cd < 30.0)).astype(float)
        X['far']   = (cd >= 30.0).astype(float)
    if 'body_center' in avail_A and 'body_center' in avail_B:
        cd_full = np.square(mouse_pair['A']['body_center'] - mouse_pair['B']['body_center']).sum(axis=1, skipna=False)
        for w in [5, 15, 30, 60]:
            ws = _scale(w, fps)
            roll = dict(min_periods=1, center=True)
            X[f'd_m{w}']  = cd_full.rolling(ws, **roll).mean()
            X[f'd_s{w}']  = cd_full.rolling(ws, **roll).std()
            X[f'd_mn{w}'] = cd_full.rolling(ws, **roll).min()
            X[f'd_mx{w}'] = cd_full.rolling(ws, **roll).max()
            d_var = cd_full.rolling(ws, **roll).var()
            X[f'int{w}'] = 1 / (1 + d_var)
            Axd = mouse_pair['A']['body_center']['x'].diff()
            Ayd = mouse_pair['A']['body_center']['y'].diff()
            Bxd = mouse_pair['B']['body_center']['x'].diff()
            Byd = mouse_pair['B']['body_center']['y'].diff()
            coord = Axd * Bxd + Ayd * Byd
            X[f'co_m{w}'] = coord.rolling(ws, **roll).mean()
            X[f'co_s{w}'] = coord.rolling(ws, **roll).std()
    if 'nose' in avail_A and 'nose' in avail_B:
        nn = np.sqrt((mouse_pair['A']['nose']['x'] - mouse_pair['B']['nose']['x'])**2 + (mouse_pair['A']['nose']['y'] - mouse_pair['B']['nose']['y'])**2)
        for lag in [10, 20, 40]:
            l = _scale(lag, fps)
            X[f'nn_lg{lag}']  = nn.shift(l)
            X[f'nn_ch{lag}']  = nn - nn.shift(l)
            is_cl = (nn < 10.0).astype(float)
            X[f'cl_ps{lag}']  = is_cl.rolling(l, min_periods=1).mean()
    if 'body_center' in avail_A and 'body_center' in avail_B:
        Avx = mouse_pair['A']['body_center']['x'].diff()
        Avy = mouse_pair['A']['body_center']['y'].diff()
        Bvx = mouse_pair['B']['body_center']['x'].diff()
        Bvy = mouse_pair['B']['body_center']['y'].diff()
        val = (Avx * Bvx + Avy * Bvy) / (np.sqrt(Avx**2 + Avy**2) * np.sqrt(Bvx**2 + Bvy**2) + 1e-6)
        for off in [-30, -20, -10, 0, 10, 20, 30]:
            o = _scale_signed(off, fps)
            X[f'va_{off}'] = val.shift(-o)
        w = _scale(30, fps)
        X['int_con'] = cd_full.rolling(w, min_periods=1, center=True).std() / (cd_full.rolling(w, min_periods=1, center=True).mean() + 1e-6)
        X = add_interaction_features(X, mouse_pair, avail_A, avail_B, fps)
    
    # Add Egocentric Features for Pair
    # 1. A's ego features (A in A's frame)
    ego_A = compute_egocentric_transform(mouse_pair['A'], body_parts_tracked)
    if ego_A is not None:
        ego_A.columns = [f"A_{c}" for c in ego_A.columns]
        X = pd.concat([X, ego_A], axis=1)
        
    # 2. B's ego features (B in B's frame) - to know B's posture
    ego_B = compute_egocentric_transform(mouse_pair['B'], body_parts_tracked)
    if ego_B is not None:
        ego_B.columns = [f"B_{c}" for c in ego_B.columns]
        X = pd.concat([X, ego_B], axis=1)

    # 3. B in A's frame (Interaction context: where is B relative to A?)
    if 'body_center' in mouse_pair['B'].columns:
        # Prepare B's key points to transform into A's frame
        target_pts = {
            'body_center': (mouse_pair['B']['body_center']['x'], mouse_pair['B']['body_center']['y'])
        }
        if 'nose' in mouse_pair['B'].columns:
            target_pts['nose'] = (mouse_pair['B']['nose']['x'], mouse_pair['B']['nose']['y'])
            
        ego_B_in_A = compute_egocentric_transform(mouse_pair['A'], [], target_points=target_pts)
        if ego_B_in_A is not None:
             X = pd.concat([X, ego_B_in_A], axis=1)

    return X.astype(np.float32, copy=False)

In [None]:
def generate_mouse_data(dataset, traintest, traintest_directory=None, generate_single=True, generate_pair=True):
    if traintest_directory is None:
        traintest_directory = f"/kaggle/input/MABe-mouse-behavior-detection/{traintest}_tracking"
    for _, row in tqdm(dataset.iterrows(), total=len(dataset), desc=f"Generating data for {traintest}"):
        lab_id = row.lab_id
        if type(row.behaviors_labeled) != str and traintest == 'train' and not lab_id.startswith('MABe22'):
             continue
        video_id = row.video_id
        path = f"{traintest_directory}/{lab_id}/{video_id}.parquet"
        try:
            vid = pd.read_parquet(path)
        except FileNotFoundError:
            continue
        if len(np.unique(vid.bodypart)) > 5:
            vid = vid.query("~ bodypart.isin(@drop_body_parts)")
        pvid = vid.pivot(columns=['mouse_id', 'bodypart'], index='video_frame', values=['x', 'y'])
        del vid
        gc.collect()
        pvid = pvid.reorder_levels([1, 2, 0], axis=1).T.sort_index().T
        pvid /= row.pix_per_cm_approx
        if type(row.behaviors_labeled) == str:
            vid_behaviors = json.loads(row.behaviors_labeled)
            vid_behaviors = sorted(list({b.replace("'", "") for b in vid_behaviors}))
            vid_behaviors = [b.split(',') for b in vid_behaviors]
            vid_behaviors = pd.DataFrame(vid_behaviors, columns=['agent', 'target', 'action'])
            
            # Remap actions to 'others1' and 'others2'
            vid_behaviors.loc[vid_behaviors['action'].isin(CFG.OTHERS1_ACTIONS), 'action'] = 'others1'
            vid_behaviors.loc[vid_behaviors['action'].isin(CFG.OTHERS2_ACTIONS), 'action'] = 'others2'
        else:
            vid_behaviors = pd.DataFrame(columns=['agent', 'target', 'action'])
        annot = None
        if traintest == 'train' and not lab_id.startswith('MABe22'):
            try:
                annot = pd.read_parquet(path.replace('train_tracking', 'train_annotation'))
            except FileNotFoundError:
                pass
        if generate_single:
            agents = np.unique(pvid.columns.get_level_values('mouse_id'))
            for mouse_id in agents:
                mouse_id_str = f"mouse{mouse_id}"
                try:
                    if annot is not None:
                        vid_behaviors_subset = vid_behaviors.query("target == 'self'")
                        vid_agent_actions = np.unique(vid_behaviors_subset.query("agent == @mouse_id_str").action)
                    else:
                        vid_agent_actions = []
                    single_mouse = pvid.loc[:, mouse_id]
                    assert len(single_mouse) == len(pvid)
                    single_mouse_meta = pd.DataFrame({'video_id': video_id, 'agent_id': mouse_id_str, 'target_id': 'self', 'video_frame': single_mouse.index})
                    if annot is not None:
                        single_mouse_label = pd.DataFrame(0.0, columns=vid_agent_actions, index=single_mouse.index)
                        annot_subset = annot.query("(agent_id == @mouse_id) & (target_id == @mouse_id)")
                        for i in range(len(annot_subset)):
                            annot_row = annot_subset.iloc[i]
                            action = annot_row.action
                            if action in CFG.OTHERS1_ACTIONS: action = 'others1'
                            elif action in CFG.OTHERS2_ACTIONS: action = 'others2'
                            if action in single_mouse_label.columns:
                                single_mouse_label.loc[annot_row['start_frame']:annot_row['stop_frame'], action] = 1.0
                        yield 'single', single_mouse, single_mouse_meta, single_mouse_label
                    else:
                        yield 'single', single_mouse, single_mouse_meta, None
                except KeyError:
                    pass
        if generate_pair:
            agents = np.unique(pvid.columns.get_level_values('mouse_id'))
            if len(agents) >= 2:
                for agent, target in itertools.permutations(agents, 2): 
                    agent_str = f"mouse{agent}"
                    target_str = f"mouse{target}"
                    if annot is not None:
                        vid_behaviors_subset = vid_behaviors.query("target != 'self'")
                        vid_agent_actions = np.unique(vid_behaviors_subset.query("(agent == @agent_str) & (target == @target_str)").action)
                    else:
                        vid_agent_actions = []
                    mouse_pair = pd.concat([pvid[agent], pvid[target]], axis=1, keys=['A', 'B'])
                    assert len(mouse_pair) == len(pvid)
                    mouse_pair_meta = pd.DataFrame({'video_id': video_id, 'agent_id': agent_str, 'target_id': target_str, 'video_frame': mouse_pair.index})
                    if annot is not None:
                        mouse_pair_label = pd.DataFrame(0.0, columns=vid_agent_actions, index=mouse_pair.index)
                        annot_subset = annot.query("(agent_id == @agent) & (target_id == @target)")
                        for i in range(len(annot_subset)):
                            annot_row = annot_subset.iloc[i]
                            action = annot_row.action
                            if action in CFG.OTHERS1_ACTIONS: action = 'others1'
                            elif action in CFG.OTHERS2_ACTIONS: action = 'others2'
                            if action in mouse_pair_label.columns:
                                mouse_pair_label.loc[annot_row['start_frame']:annot_row['stop_frame'], action] = 1.0
                        yield 'pair', mouse_pair, mouse_pair_meta, mouse_pair_label
                    else:
                        yield 'pair', mouse_pair, mouse_pair_meta, None

In [7]:
# =============================================================================
# Training and Prediction Functions
# =============================================================================

def train_xgboost(X, y, groups, model_dir):
    os.makedirs(model_dir, exist_ok=True)
    cv = GroupKFold(n_splits=CFG.n_splits)
    
    # Calculate scale_pos_weight for class imbalance
    n_pos = y.sum()
    n_neg = len(y) - n_pos
    scale = n_neg / (n_pos + 1)  # Minimum 10.0 as requested
    print(f"  Class ratio - Positive: {n_pos}, Negative: {n_neg}, Scale: {scale:.2f}")
    
    oof_preds = np.zeros(len(y))
    models = []
    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y, groups)):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_valid, y_valid = X.iloc[valid_idx], y.iloc[valid_idx]
        params_with_scale = CFG.xgb_params.copy()
        params_with_scale['scale_pos_weight'] = scale
        model = XGBClassifier(**params_with_scale)
        model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
        
        # Predict and Smooth
        raw_preds = model.predict_proba(X_valid)[:, 1]
        oof_preds[valid_idx] = apply_temporal_smoothing(raw_preds)
        
        models.append(model)
    joblib.dump(models, f"{model_dir}/xgb_models.pkl")
    return oof_preds, models

def tune_threshold(y_true, y_pred_proba):
    def objective(trial):
        threshold = trial.suggest_float("threshold", 0.1, 0.9, step=0.01)
        return f1_score(y_true, (y_pred_proba >= threshold), zero_division=0)
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10, n_jobs=-1)
    return study.best_params["threshold"]

def apply_temporal_smoothing(probs, window_size=15):
    """
    Applies a rolling mean smoothing to the probability sequence.
    """
    if len(probs) < window_size:
        return probs
    
    # Use pandas rolling for convenience
    s = pd.Series(probs)
    # Center=True to avoid phase shift
    smoothed = s.rolling(window=window_size, min_periods=1, center=True).mean().values
    return smoothed

def save_feature_importance(models, feature_names, output_dir, prefix):
    """
    Aggregates and saves feature importance from a list of XGBoost models.
    """
    import pandas as pd
    
    if not models:
        return
        
    importance_df = pd.DataFrame(index=feature_names)
    
    for i, model in enumerate(models):
        # Get importance type 'gain' (average gain)
        imp = model.get_booster().get_score(importance_type='gain')
        # Map to feature names (XGBoost might drop unused features)
        imp_series = pd.Series(imp).reindex(feature_names, fill_value=0)
        importance_df[f'model_{i}'] = imp_series
        
    importance_df['mean_importance'] = importance_df.mean(axis=1)
    importance_df = importance_df.sort_values('mean_importance', ascending=False)
    
    # Save to CSV
    os.makedirs(output_dir, exist_ok=True)
    importance_df.to_csv(f"{output_dir}/feature_importance_{prefix}.csv")
    
    print(f"\n    Top 10 Features ({prefix}):")
    print(importance_df['mean_importance'].head(10))


In [None]:
# =============================================================================
# MABe F1 Score Evaluation Functions
# =============================================================================
from collections import defaultdict

def segments_to_frames(segments_df):
    """Convert segment DataFrame to frame-level dictionary"""
    frame_dict = defaultdict(set)
    
    for _, row in segments_df.iterrows():
        key = f"{row['video_id']}_{row['agent_id']}_{row['target_id']}_{row['action']}"
        frame_dict[key].update(range(row['start_frame'], row['stop_frame']))
    
    return frame_dict

def calculate_mabe_f1(ground_truth_df, prediction_df, beta=1.0):
    """Calculate MABe F1 score"""
    
    # Convert to frame sets
    gt_frames = segments_to_frames(ground_truth_df)
    pred_frames = segments_to_frames(prediction_df)
    
    # Calculate per-action metrics
    tps = defaultdict(int)
    fps = defaultdict(int)
    fns = defaultdict(int)
    
    # All unique keys from both GT and predictions
    all_keys = set(gt_frames.keys()) | set(pred_frames.keys())
    distinct_actions = set()
    
    for key in all_keys:
        action = key.split('_')[-1]
        distinct_actions.add(action)
        
        gt_set = gt_frames.get(key, set())
        pred_set = pred_frames.get(key, set())
        
        tps[action] += len(gt_set & pred_set)  # Intersection
        fps[action] += len(pred_set - gt_set)  # Predicted but not in GT
        fns[action] += len(gt_set - pred_set)  # In GT but not predicted
    
    # Calculate F1 per action
    action_f1s = []
    results_detail = {}
    
    for action in sorted(distinct_actions):
        tp = tps[action]
        fp = fps[action]
        fn = fns[action]
        
        if tp + fp + fn == 0:
            f1 = 0.0
        else:
            f1 = (1 + beta**2) * tp / ((1 + beta**2) * tp + beta**2 * fn + fp)
        
        action_f1s.append(f1)
        results_detail[action] = {
            'TP': tp, 'FP': fp, 'FN': fn,
            'Precision': tp / (tp + fp) if (tp + fp) > 0 else 0,
            'Recall': tp / (tp + fn) if (tp + fn) > 0 else 0,
            'F1': f1
        }
    
    overall_f1 = sum(action_f1s) / len(action_f1s) if action_f1s else 0.0
    
    return overall_f1, results_detail

def probabilities_to_segments(probs, threshold, video_id, agent, target, action):
    """Convert probability sequence to segments"""
    binary = (probs >= threshold).astype(int)
    diff = np.diff(np.concatenate(([0], binary, [0])))
    starts = np.where(diff == 1)[0]
    stops = np.where(diff == -1)[0]
    
    segments = []
    for start, stop in zip(starts, stops):
        segments.append({
            'video_id': video_id,
            'agent_id': agent,
            'target_id': target,
            'action': action,
            'start_frame': int(start),
            'stop_frame': int(stop)
        })
    return segments

print("✅ MABe F1 evaluation functions loaded")

In [None]:
# =============================================================================
# Main Execution (Optimized + Smart Subsampling)
# =============================================================================

if __name__ == "__main__":
    # [NEW] Global OOF Storage for Final Evaluation
    global_oof_preds = []
    global_ground_truth = []
    # =========================================================================
    # Phase 0: Setup
    # =========================================================================
    print("Loading train.csv...")
    train = pd.read_csv(CFG.train_path)
    train_labeled = train[~train.lab_id.str.startswith('MABe22')]
    train_unlabeled = train[train.lab_id.str.startswith('MABe22')]
    
    # Shuffle and split MABe22 data
    train_unlabeled = train_unlabeled.sample(frac=1, random_state=42).reset_index(drop=True)
    n_unlabeled = len(train_unlabeled)
    n_subset = int(n_unlabeled * 0.1)
    
    train_unlabeled_train = train_unlabeled.iloc[:n_subset]
    train_unlabeled_val = train_unlabeled.iloc[n_subset:n_subset*2]
    
    print(f"Labeled videos: {len(train_labeled)}")
    print(f"Unlabeled (MABe22) videos: {len(train_unlabeled)}")
    print(f"  - Used for Pseudo-labeling (10%): {len(train_unlabeled_train)}")
    print(f"  - Used for Validation (10%): {len(train_unlabeled_val)}")
    
    _fps_lookup = train_labeled[['video_id', 'frames_per_second']].drop_duplicates('video_id').set_index('video_id')['frames_per_second'].to_dict()
    
    # =========================================================================
    # Sequential Processing: Single -> Clear Memory -> Pair
    # =========================================================================
    for mode in ['single', 'pair']:
        print(f"\n" + "="*60)
        print(f" PROCESSING MODE: {mode.upper()}")
        print("="*60)
        
        # --- 1. Generate Labeled Data for Current Mode ---
        print(f"Generating labeled data for {mode}...")
        gen_labeled = generate_mouse_data(
            train_labeled, 'train', 
            generate_single=(mode=='single'), 
            generate_pair=(mode=='pair')
        )
        
        data_storage = {'X': [], 'y': [], 'meta': []}
        all_actions = set()
        
        for switch, data, meta, label in gen_labeled:
            if switch != mode: continue
            if label is None or label.shape[1] == 0: continue
            
            fps_i = _fps_from_meta(meta, _fps_lookup, default_fps=30.0)
            if mode == 'single':
                X_i = transform_single(data, CFG.CANONICAL_BODY_PARTS, fps_i)
            else:
                X_i = transform_pair(data, CFG.CANONICAL_BODY_PARTS, fps_i)
            
            # --- SMART SUBSAMPLING (Both Modes) ---
            if mode in ['single', 'pair']:
                # Align meta index with data index to ensure loc works correctly
                meta.index = X_i.index
                
                # Identify active frames (any behavior present)
                active_mask = label.sum(axis=1) > 0
                bg_mask = ~active_mask
                
                # Keep 100% of active frames
                active_indices = label[active_mask].index
                
                # Sample background frames (3:1 ratio with active)
                if bg_mask.sum() > 0:
                    if len(active_indices) > 0:
                        sample_size = min(len(active_indices) * 1, bg_mask.sum())
                    else:
                        sample_size = min(1000, bg_mask.sum())
                    bg_indices = label[bg_mask].sample(n=sample_size, random_state=42).index
                else:
                    bg_indices = pd.Index([])
                
                # Combine and sort indices
                keep_indices = active_indices.union(bg_indices).sort_values()
                
                # Filter data
                X_i = X_i.loc[keep_indices]
                X_i = X_i.astype(np.float32) # [NEW] Reduce memory
                X_i = X_i.astype(np.float32) # [NEW] Reduce memory
                label = label.loc[keep_indices]
                meta = meta.loc[keep_indices]
                
                # Log data statistics
                print(f"    Video {meta['video_id'].iloc[0]}: Active={len(active_indices)}, Background={len(bg_indices)}, Ratio={len(active_indices)/(len(bg_indices)+1):.2f}")
                
            data_storage['X'].append(X_i)
            data_storage['y'].append(label)
            data_storage['meta'].append(meta)
            all_actions.update(label.columns)
            del data, meta, label, X_i
        
        gc.collect()
        
        if not data_storage['X']:
            print(f"No data found for mode {mode}. Skipping.")
            continue
            
        X_tr = pd.concat(data_storage['X'], axis=0, ignore_index=True)
        y_tr_df = pd.concat(data_storage['y'], axis=0, ignore_index=True).fillna(0.0)
        meta_tr = pd.concat(data_storage['meta'], axis=0, ignore_index=True)
        
        # Clear list storage immediately
        del data_storage
        gc.collect()
        
        # --- 2. Phase 1 Training ---
        print(f"\n--- Phase 1: Training Baseline ({mode}) ---")
        actions_in_mode = sorted([a for a in all_actions if a in y_tr_df.columns])
        y_tr_df = y_tr_df.reindex(columns=actions_in_mode, fill_value=0.0)
        
        phase1_models_mode = {}
        phase1_thresholds_mode = {}
        
        for action in actions_in_mode:
            print(f"  Action: {action}")
            y_action = y_tr_df[action].values
            groups = meta_tr.video_id
            if y_action.sum() < CFG.n_splits:
                print(f"    Skipping (not enough positive samples)")
                continue
                
            model_dir = f"{CFG.model_path}/{CFG.model_name_xgb_base}/{mode}/{action}"
            oof_preds, models = train_xgboost(X_tr, pd.Series(y_action), groups, model_dir)
            best_thresh = tune_threshold(y_action, oof_preds)
            
            f1 = f1_score(y_action, (oof_preds >= best_thresh))
            print(f"    OOF F1: {f1:.4f} (Thresh: {best_thresh:.2f})")
            
            phase1_models_mode[action] = models
            phase1_thresholds_mode[action] = best_thresh
            joblib.dump(best_thresh, f"{model_dir}/threshold.pkl")
            save_feature_importance(models, X_tr.columns, model_dir, "phase1")

            # [NEW] Collect OOF predictions for Global Evaluation
            # Reconstruct segments from OOF frame predictions
            # We need to map back to original video_id and frames
            # Since X_tr is concatenated, we iterate through groups
            
            # Create a temporary dataframe for OOF processing
            oof_df = pd.DataFrame({
                'video_id': meta_tr['video_id'],
                'agent_id': meta_tr['agent_id'],
                'target_id': meta_tr['target_id'],
                'frame': meta_tr['video_frame'],
                'pred': oof_preds,
                'label': y_action
            })
            
            # Process each video to extract segments
            for vid, grp in oof_df.groupby('video_id'):
                grp = grp.sort_values('frame')
                agent_id = grp['agent_id'].iloc[0]
                target_id = grp['target_id'].iloc[0]
                
                # Predictions to segments
                pred_segments = probabilities_to_segments(
                    grp['pred'].values, best_thresh, vid, agent_id, target_id, action
                )
                global_oof_preds.extend(pred_segments)
                
                # Ground Truth to segments
                gt_binary = grp['label'].values.astype(int)
                diff = np.diff(np.concatenate(([0], gt_binary, [0])))
                starts = np.where(diff == 1)[0]
                stops = np.where(diff == -1)[0]
                
                for start, stop in zip(starts, stops):
                    # Map back to original frame numbers if needed, but here we use relative index
                    # Note: meta_tr['video_frame'] contains actual frame numbers
                    real_start = grp['frame'].iloc[start]
                    real_stop = grp['frame'].iloc[stop-1] + 1
                    
                    global_ground_truth.append({
                        'video_id': vid,
                        'agent_id': agent_id,
                        'target_id': target_id,
                        'action': action,
                        'start_frame': real_start,
                        'stop_frame': real_stop
                    })
            
            del oof_preds, models, y_action
            gc.collect()
            
        # --- Phase 2: Pseudo-Labeling (REMOVED in v7) ---
        print(f"Cleaning up memory for mode {mode}...")
        del X_tr, y_tr_df, meta_tr
        if 'phase1_models_mode' in locals(): del phase1_models_mode
        gc.collect()
    
    
    # =========================================================================
    # Global F1 Score Calculation
    # =========================================================================
    print("\n" + "="*60)
    print(" GLOBAL F1 SCORE REPORT")
    if global_oof_preds and global_ground_truth:
        print("Calculating Global MABe F1 Score from OOF predictions...")
        oof_pred_df = pd.DataFrame(global_oof_preds)
        oof_gt_df = pd.DataFrame(global_ground_truth)
        
        global_f1, global_details = calculate_mabe_f1(oof_gt_df, oof_pred_df)
        
        # [NEW] Calculate Single vs Pair F1
        single_actions = set(CFG.OTHERS1_ACTIONS + CFG.OTHERS2_ACTIONS + ['rear', 'selfgroom', 'dig'])
        pair_actions = set(['investigate', 'mount', 'attack', 'chase', 'sniff', 'sniffgenital', 'sniffbody', 'sniffface', 'approach', 'escape', 'avoid', 'submit', 'intromit', 'chaseattack'])
        
        single_f1s = []
        pair_f1s = []
        
        for action, d in global_details.items():
            # Simple heuristic: if action is in our known single lists, it's single. Else pair.
            # Or check if it was trained in 'single' mode loop. But here we just use names.
            # Note: 'others1' and 'others2' can be in both, but usually single.
            # Let's use the 'mode' if we had stored it, but we didn't.
            # Let's just print all and let user see.
            pass
        
        print(f"\n{'='*60}")
        print(f" GLOBAL CV F1 SCORE: {global_f1:.4f}")
        print(f"{'='*60}")
        
        print("\nPer-Action CV Results:")
        print("-" * 80)
        print(f"{'Action':<20} {'Precision':<12} {'Recall':<12} {'F1':<12}")
        print("-" * 80)
        
        # Sort by F1 score descending
        sorted_actions = sorted(global_details.keys(), key=lambda x: global_details[x]['F1'], reverse=True)
        
        for action in sorted_actions:
            d = global_details[action]
            print(f"{action:<20} {d['Precision']:<12.4f} {d['Recall']:<12.4f} {d['F1']:<12.4f}")
        print("-" * 80)
        
        
        
    print("="*60)
    
    # Note: This requires saving OOF predictions during training
    # For now, we show per-action F1 scores that were already printed
    print("\nSee individual action F1 scores above.")
    
    print("\nDone! All modes processed.")


Loading train.csv...
Labeled videos: 863
Unlabeled (MABe22) videos: 7926
  - Used for Pseudo-labeling (10%): 792
  - Used for Validation (10%): 792

--- Phase 1: Training Unified Baseline Model ---
Canonical body parts: ['nose', 'ear_left', 'ear_right', 'body_center', 'tail_base', 'spine_middle', 'tail_tip']
Generating features for all labeled data...


Generating data for train: 100%|██████████| 863/863 [05:20<00:00,  2.70it/s] 



--- Training Baseline for Mode: SINGLE ---
  Action: others1
    OOF F1: 0.4469 (Thresh: 0.16)

    Top 10 Features (phase1):
ear_right+body_center    4524.241862
ear_left+body_center     4024.168193
ear_left+tail_tip        2363.867757
tail_base+tail_tip       2026.913574
s0_20                    1959.736453
cx_m30                   1291.299255
nose+ear_right           1242.225505
ear_right+tail_tip       1026.902527
nose+body_center          835.080048
ego_tail_base_y           781.477132
Name: mean_importance, dtype: float64
  Action: others2
    OOF F1: 0.4808 (Thresh: 0.29)

    Top 10 Features (phase1):
s0_20             2038.813141
nose+ear_left     1742.871602
sp_lf2            1322.224772
sp_lf             1268.439412
nt_lg10           1226.905314
sp_rt             1173.466370
nose+tail_base    1140.424723
act5              1093.417887
sp_m20            1003.062960
nose+tail_tip      891.708008
Name: mean_importance, dtype: float64
  Action: rear
    OOF F1: 0.3994 (Thresh: 0

KeyboardInterrupt: 

In [None]:
# =============================================================================
# Phase 4: Inference & Submission Generation
# =============================================================================
import os
import joblib
import numpy as np
import pandas as pd
import gc

# [NEW] Post-processing: Gap Filling
def fill_gaps(binary_preds, max_gap=5):
    """Fills short gaps (0s) between 1s."""
    # Convert to int for processing
    preds = binary_preds.astype(int)
    # Find runs of 0s
    # Pad with 1s to handle edge cases
    padded = np.concatenate(([1], preds, [1]))
    diff = np.diff(padded)
    starts = np.where(diff == -1)[0]
    stops = np.where(diff == 1)[0]
    
    for start, stop in zip(starts, stops):
        gap_len = stop - start
        if gap_len <= max_gap:
            preds[start:stop] = 1
    return preds

# [NEW] Post-processing: Min Duration
def remove_short_duration(binary_preds, min_len=5):
    """Removes short bursts of 1s."""
    preds = binary_preds.astype(int)
    padded = np.concatenate(([0], preds, [0]))
    diff = np.diff(padded)
    starts = np.where(diff == 1)[0]
    stops = np.where(diff == -1)[0]
    
    for start, stop in zip(starts, stops):
        duration = stop - start
        if duration <= min_len:
            preds[start:stop] = 0
    return preds

# [Helper Function] 확률 -> 구간(Segment) 변환
def probabilities_to_segments(probs, threshold, video_id, agent, target, action):
    binary = (probs >= threshold).astype(int)
    # 0->1 (시작), 1->0 (끝) 지점 찾기
    diff = np.diff(np.concatenate(([0], binary, [0])))
    starts = np.where(diff == 1)[0]
    stops = np.where(diff == -1)[0]
    
    segments = []
    for start, stop in zip(starts, stops):
        segments.append({
            'video_id': video_id,
            'agent_id': agent,
            'target_id': target,
            'action': action,
            'start_frame': start,
            'stop_frame': stop
        })
    return segments

submission_data = []

print("\n" + "="*60)
print(" PHASE 4: INFERENCE & SUBMISSION")
print("="*60)

# Single / Pair 모드 각각에 대해 테스트 데이터 예측 수행
for mode in ['single', 'pair']:
    print(f"\n--- Inference for mode: {mode} ---")
    
    try:
        # 테스트 데이터 로드
        test = pd.read_csv(CFG.test_path)
        
        # 테스트 데이터 제너레이터 생성
        gen_test = generate_mouse_data(
            test, 'test', 
            generate_single=(mode=='single'), 
            generate_pair=(mode=='pair')
        )
        
        for switch, data_t, meta_t, _ in gen_test:
            if switch != mode: continue
            
            # 피처 엔지니어링
            fps_t = _fps_from_meta(meta_t, {}, default_fps=30.0)
            if mode == 'single':
                X_t = transform_single(data_t, CFG.CANONICAL_BODY_PARTS, fps_t)
            else:
                X_t = transform_pair(data_t, CFG.CANONICAL_BODY_PARTS, fps_t)
                
            # 저장된 모델 폴더를 찾아서 예측할 행동 목록 확인
            # (Phase 2 모델이 있으면 우선 사용, 없으면 Phase 1 사용)
            model_base_path = f"{CFG.model_path}/{CFG.model_name_xgb_base}/{mode}"
            
            if not os.path.exists(model_base_path):
                print(f"  No trained models found for {mode}. Skipping.")
                continue
                
            trained_actions = [d for d in os.listdir(model_base_path) if os.path.isdir(os.path.join(model_base_path, d))]
            
            # 각 행동별로 예측 수행
            for action in trained_actions:
                model_dir_p1 = f"{CFG.model_path}/{CFG.model_name_xgb_base}/{mode}/{action}"
                
                final_models = []
                final_thresh = 0.5
                
                # 모델 로드
                if os.path.exists(f"{model_dir_p1}/xgb_models.pkl"):
                    final_models = joblib.load(f"{model_dir_p1}/xgb_models.pkl")
                    final_thresh = joblib.load(f"{model_dir_p1}/threshold.pkl")
                else:
                    continue
                    
                # 예측 (Ensemble Average)
                prob = np.mean([m.predict_proba(X_t)[:, 1] for m in final_models], axis=0)
                
                # 후처리 (Temporal Smoothing)
                prob = apply_temporal_smoothing(prob)
                
                # 결과 변환 (Segments)
                video_id = meta_t['video_id'].iloc[0]
                agent_id = meta_t['agent_id'].iloc[0]
                target_id = meta_t['target_id'].iloc[0]
                
                
                # [NEW] Apply Post-processing (Gap Filling + Min Duration)
                binary_preds = (prob >= final_thresh).astype(int)
                binary_preds = fill_gaps(binary_preds, max_gap=5)
                binary_preds = remove_short_duration(binary_preds, min_len=5)
                
                # Pass processed binary preds with threshold 0.5
                segments = probabilities_to_segments(binary_preds, 0.5, video_id, agent_id, target_id, action)
                submission_data.extend(segments)
                
            del X_t, data_t, meta_t
            gc.collect()
            
    except Exception as e:
        print(f"Error during inference for {mode}: {e}")
        import traceback
        traceback.print_exc()

# CSV 파일 생성
if submission_data:
    print("\nCreating submission.csv...")
    submission_df = pd.DataFrame(submission_data)
    submission_df['row_id'] = range(len(submission_df))
    # 컬럼 순서 정렬
    submission_df = submission_df[['row_id', 'video_id', 'agent_id', 'target_id', 'action', 'start_frame', 'stop_frame']]
    submission_df.to_csv("submission.csv", index=False)
    print(f"Submission saved to submission.csv with {len(submission_df)} segments.")
else:
    print("\nNo submission data generated.")




In [None]:
# =============================================================================
# Validation Evaluation on MABe22 Split
# =============================================================================
print("\n" + "="*60)
print(" VALIDATION: MABe F1 Score Evaluation")
print("="*60)

# Generate predictions on validation set
validation_predictions = []
validation_ground_truth = []

print(f"\nGenerating predictions on {len(train_unlabeled_val)} validation videos...")

for mode in ['single', 'pair']:
    print(f"\n--- Mode: {mode} ---")
    
    # Check if models exist
    model_base_path = f"{CFG.model_path}/{CFG.model_name_xgb_base}/{mode}"
    if not os.path.exists(model_base_path):
        print(f"  No models found for {mode}. Skipping.")
        continue
    
    trained_actions = [d for d in os.listdir(model_base_path) if os.path.isdir(os.path.join(model_base_path, d))]
    print(f"  Trained actions: {', '.join(trained_actions)}")
    
    # Generate data
    gen_val = generate_mouse_data(
        train_unlabeled_val.head(10),  # Sample 10 videos for quick eval
        'train',
        generate_single=(mode=='single'),
        generate_pair=(mode=='pair')
    )
    
    for switch, data_v, meta_v, label_v in gen_val:
        if switch != mode: continue
        
        # Feature engineering
        fps_v = _fps_from_meta(meta_v, _fps_lookup, default_fps=30.0)
        if mode == 'single':
            X_v = transform_single(data_v, CFG.CANONICAL_BODY_PARTS, fps_v)
        else:
            X_v = transform_pair(data_v, CFG.CANONICAL_BODY_PARTS, fps_v)
        
        video_id = meta_v['video_id'].iloc[0]
        agent_id = meta_v['agent_id'].iloc[0]
        target_id = meta_v['target_id'].iloc[0]
        
        # Predict with each action's model
        for action in trained_actions:
            model_dir = f"{model_base_path}/{action}"
            model_path = f"{model_dir}/xgb_models.pkl"
            threshold_path = f"{model_dir}/threshold.pkl"
            
            if not os.path.exists(model_path) or not os.path.exists(threshold_path):
                continue
            
            models = joblib.load(model_path)
            threshold = joblib.load(threshold_path)
            
            # Ensemble prediction
            prob = np.mean([m.predict_proba(X_v)[:, 1] for m in models], axis=0)
            prob = apply_temporal_smoothing(prob)
            
            # Convert to segments
            segments = probabilities_to_segments(prob, threshold, video_id, agent_id, target_id, action)
            validation_predictions.extend(segments)
        
        # Ground truth (if available)
        if label_v is not None:
            for action_col in label_v.columns:
                frames_with_action = label_v[label_v[action_col] > 0].index.tolist()
                if frames_with_action:
                    # Group consecutive frames into segments
                    segments = []
                    start = frames_with_action[0]
                    for i in range(1, len(frames_with_action)):
                        if frames_with_action[i] != frames_with_action[i-1] + 1:
                            segments.append({'start_frame': start, 'stop_frame': frames_with_action[i-1] + 1})
                            start = frames_with_action[i]
                    segments.append({'start_frame': start, 'stop_frame': frames_with_action[-1] + 1})
                    
                    for seg in segments:
                        validation_ground_truth.append({
                            'video_id': video_id,
                            'agent_id': agent_id,
                            'target_id': target_id,
                            'action': action_col,
                            'start_frame': seg['start_frame'],
                            'stop_frame': seg['stop_frame']
                        })
        
        del X_v, data_v, meta_v
        gc.collect()

# Calculate F1 score
if validation_predictions and validation_ground_truth:
    pred_df = pd.DataFrame(validation_predictions)
    gt_df = pd.DataFrame(validation_ground_truth)
    
    overall_f1, action_details = calculate_mabe_f1(gt_df, pred_df)
    
    print("\n" + "="*60)
    print(f" Overall F1 Score: {overall_f1:.4f}")
    print("="*60)
    
    print("\nPer-Action Results:")
    print("-" * 80)
    print(f"{'Action':<20} {'Precision':<12} {'Recall':<12} {'F1':<12} {'TP':<8} {'FP':<8} {'FN':<8}")
    print("-" * 80)
    
    for action in sorted(action_details.keys()):
        d = action_details[action]
        print(f"{action:<20} {d['Precision']:<12.4f} {d['Recall']:<12.4f} {d['F1']:<12.4f} "
              f"{d['TP']:<8} {d['FP']:<8} {d['FN']:<8}")
    
    print("-" * 80)
else:
    print("\n⚠️ No predictions or ground truth available for evaluation")

print("\n✅ Validation evaluation complete!")

In [None]:
# =============================================================================
# Prediction Summary
# =============================================================================
if 'submission_df' in locals() and not submission_df.empty:
    print("\n" + "="*60)
    print(" PREDICTION DISTRIBUTION (MABe22 Test Set)")
    print("="*60)
    
    # Count segments per action
    action_counts = submission_df['action'].value_counts()
    
    print(f"Total Predicted Segments: {len(submission_df)}")
    print("\nAction Counts:")
    print(action_counts)
    
    print("\nAction Proportions (%):")
    print((action_counts / len(submission_df) * 100).round(2))
    
    # Optional: Plot if matplotlib is available
    try:
        import matplotlib.pyplot as plt
        plt.figure(figsize=(10, 6))
        action_counts.plot(kind='bar')
        plt.title('Predicted Action Distribution on MABe22 Test Set')
        plt.xlabel('Action')
        plt.ylabel('Count')
        plt.tight_layout()
        plt.show()
    except ImportError:
        pass
else:
    print("No submission data found to summarize.")

In [None]:
# =============================================================================
# Prediction Summary
# =============================================================================
if 'submission_df' in locals() and not submission_df.empty:
    print("\n" + "="*60)
    print(" PREDICTION DISTRIBUTION (MABe22 Test Set)")
    print("="*60)
    
    # Count segments per action
    action_counts = submission_df['action'].value_counts()
    
    print(f"Total Predicted Segments: {len(submission_df)}")
    print("\nAction Counts:")
    print(action_counts)
    
    print("\nAction Proportions (%):")
    print((action_counts / len(submission_df) * 100).round(2))
    
    # Optional: Plot if matplotlib is available
    try:
        import matplotlib.pyplot as plt
        plt.figure(figsize=(10, 6))
        action_counts.plot(kind='bar')
        plt.title('Predicted Action Distribution on MABe22 Test Set')
        plt.xlabel('Action')
        plt.ylabel('Count')
        plt.tight_layout()
        plt.show()
    except ImportError:
        pass
else:
    print("No submission data found to summarize.")