In [1]:
# test77.ipynb
# - Feature Engineering: v7 egocentric transform (centering-based)
# - Post-processing: gap filling + short duration removal
# - Model: XGBoost with Optuna hyperparameter optimization
# - CV: GroupKFold (n_splits=2)

from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score
from sklearn.base import clone
from xgboost import XGBClassifier
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import itertools
import warnings
import optuna
import joblib
import glob
import gc
import os

optuna.logging.set_verbosity(optuna.logging.WARNING)
warnings.filterwarnings('ignore')

In [None]:
"""F Beta customized for the data format of the MABe challenge."""

import json

from collections import defaultdict

import pandas as pd
import polars as pl


class HostVisibleError(Exception):
    pass


def single_lab_f1(lab_solution: pl.DataFrame, lab_submission: pl.DataFrame, beta: float = 1) -> float:
    label_frames: defaultdict[str, set[int]] = defaultdict(set)
    prediction_frames: defaultdict[str, set[int]] = defaultdict(set)

    for row in lab_solution.to_dicts():
        label_frames[row['label_key']].update(range(row['start_frame'], row['stop_frame']))

    for video in lab_solution['video_id'].unique():
        active_labels: str = lab_solution.filter(pl.col('video_id') == video)['behaviors_labeled'].first()  # ty: ignore
        active_labels: set[str] = set(json.loads(active_labels))
        predicted_mouse_pairs: defaultdict[str, set[int]] = defaultdict(set)

        for row in lab_submission.filter(pl.col('video_id') == video).to_dicts():
            # Since the labels are sparse, we can't evaluate prediction keys not in the active labels.
            if ','.join([str(row['agent_id']), str(row['target_id']), row['action']]) not in active_labels:
                continue

            new_frames = set(range(row['start_frame'], row['stop_frame']))
            # Ignore truly redundant predictions.
            new_frames = new_frames.difference(prediction_frames[row['prediction_key']])
            prediction_pair = ','.join([str(row['agent_id']), str(row['target_id'])])
            if predicted_mouse_pairs[prediction_pair].intersection(new_frames):
                # A single agent can have multiple targets per frame (ex: evading all other mice) but only one action per target per frame.
                raise HostVisibleError('Multiple predictions for the same frame from one agent/target pair')
            prediction_frames[row['prediction_key']].update(new_frames)
            predicted_mouse_pairs[prediction_pair].update(new_frames)

    tps = defaultdict(int)
    fns = defaultdict(int)
    fps = defaultdict(int)
    for key, pred_frames in prediction_frames.items():
        action = key.split('_')[-1]
        matched_label_frames = label_frames[key]
        tps[action] += len(pred_frames.intersection(matched_label_frames))
        fns[action] += len(matched_label_frames.difference(pred_frames))
        fps[action] += len(pred_frames.difference(matched_label_frames))

    distinct_actions = set()
    for key, frames in label_frames.items():
        action = key.split('_')[-1]
        distinct_actions.add(action)
        if key not in prediction_frames:
            fns[action] += len(frames)

    action_f1s = []
    for action in distinct_actions:
        if tps[action] + fns[action] + fps[action] == 0:
            action_f1s.append(0)
        else:
            action_f1s.append((1 + beta**2) * tps[action] / ((1 + beta**2) * tps[action] + beta**2 * fns[action] + fps[action]))
    return sum(action_f1s) / len(action_f1s)


def mouse_fbeta(solution: pd.DataFrame, submission: pd.DataFrame, beta: float = 1) -> float:
    if len(solution) == 0 or len(submission) == 0:
        raise ValueError('Missing solution or submission data')

    expected_cols = ['video_id', 'agent_id', 'target_id', 'action', 'start_frame', 'stop_frame']

    for col in expected_cols:
        if col not in solution.columns:
            raise ValueError(f'Solution is missing column {col}')
        if col not in submission.columns:
            raise ValueError(f'Submission is missing column {col}')

    solution: pl.DataFrame = pl.DataFrame(solution)
    submission: pl.DataFrame = pl.DataFrame(submission)
    assert (solution['start_frame'] <= solution['stop_frame']).all()
    assert (submission['start_frame'] <= submission['stop_frame']).all()
    solution_videos = set(solution['video_id'].unique())
    # Need to align based on video IDs as we can't rely on the row IDs for handling public/private splits.
    submission = submission.filter(pl.col('video_id').is_in(solution_videos))

    solution = solution.with_columns(
        pl.concat_str(
            [
                pl.col('video_id').cast(pl.Utf8),
                pl.col('agent_id').cast(pl.Utf8),
                pl.col('target_id').cast(pl.Utf8),
                pl.col('action'),
            ],
            separator='_',
        ).alias('label_key'),
    )
    submission = submission.with_columns(
        pl.concat_str(
            [
                pl.col('video_id').cast(pl.Utf8),
                pl.col('agent_id').cast(pl.Utf8),
                pl.col('target_id').cast(pl.Utf8),
                pl.col('action'),
            ],
            separator='_',
        ).alias('prediction_key'),
    )

    lab_scores = []
    for lab in solution['lab_id'].unique():
        lab_solution = solution.filter(pl.col('lab_id') == lab).clone()
        lab_videos = set(lab_solution['video_id'].unique())
        lab_submission = submission.filter(pl.col('video_id').is_in(lab_videos)).clone()
        lab_scores.append(single_lab_f1(lab_solution, lab_submission, beta=beta))

    return sum(lab_scores) / len(lab_scores)


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, beta: float = 1) -> float:
    """
    F1 score for the MABe Challenge
    """
    solution = solution.drop(row_id_column_name, axis='columns', errors='ignore')
    submission = submission.drop(row_id_column_name, axis='columns', errors='ignore')
    return mouse_fbeta(solution, submission, beta=beta)


In [None]:
class CFG:
    train_path = "MABe-mouse-behavior-detection/train.csv"
    test_path = "MABe-mouse-behavior-detection/test.csv"
    train_annotation_path = "MABe-mouse-behavior-detection/train_annotation"
    train_tracking_path = "MABe-mouse-behavior-detection/train_tracking"
    test_tracking_path = "MABe-mouse-behavior-detection/test_tracking"

    model_path = "models"
    model_name = "xgboost" 
    
    mode = "validate"
    # mode = "submit"
    
    n_splits = 2
    cv = GroupKFold(n_splits=n_splits)

    xgb_params = {
        'n_estimators': 300,
        'learning_rate': 0.05,
        'max_depth': 6,
        'n_jobs': -1,
        'tree_method': 'hist',
        'random_state': 42,
        'eval_metric': 'logloss',
        'device': 'cuda'
    }


In [3]:
train = pd.read_csv(CFG.train_path)
train['n_mice'] = 4 - train[['mouse1_strain', 'mouse2_strain', 'mouse3_strain', 'mouse4_strain']].isna().sum(axis=1)
train_without_mabe22 = train.query("~lab_id.str.startswith('MABe22_')")

test = pd.read_csv(CFG.test_path)
body_parts_tracked_list = list(np.unique(train.body_parts_tracked))

body_parts_tracked_list

['["body_center", "ear_left", "ear_right", "forepaw_left", "forepaw_right", "hindpaw_left", "hindpaw_right", "neck", "nose", "tail_base", "tail_midpoint", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "headpiece_bottombackleft", "headpiece_bottombackright", "headpiece_bottomfrontleft", "headpiece_bottomfrontright", "headpiece_topbackleft", "headpiece_topbackright", "headpiece_topfrontleft", "headpiece_topfrontright", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_midpoint", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "hip_left", "hip_right", "lateral_left", "lateral_right", "nose", "spine_1", "spine_2", "tail_base", "tail_middle_1", "tail_middle_2", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "neck", "nose", "tail_base", "tail_midpoint", "tail_tip"]',
 '["body_center", "ear_left", "ear_right", "lateral_left", "lateral_right", "nose", "tail_base", "tail_tip"]',
 '["body_center", "ear_left", "ear_r

In [4]:
# (lab_id, video_id, behaviors_labeled, target_id, agent_id) --> Build dataset

def create_solution_df(dataset):
    solution = []
    for _, row in tqdm(dataset.iterrows(), total=len(dataset)):
    
        lab_id = row['lab_id']
        if lab_id.startswith('MABe22'): 
            continue
        
        video_id = row['video_id']
        path = f"{CFG.train_annotation_path}/{lab_id}/{video_id}.parquet"
        try:
            annot = pd.read_parquet(path)
        except FileNotFoundError:
            continue
    
        annot['lab_id'] = lab_id
        annot['video_id'] = video_id
        annot['behaviors_labeled'] = row['behaviors_labeled']
        annot['target_id'] = np.where(annot.target_id != annot.agent_id, annot['target_id'].apply(lambda s: f"mouse{s}"), 'self')
        annot['agent_id'] = annot['agent_id'].apply(lambda s: f"mouse{s}")
        solution.append(annot)
    
    solution = pd.concat(solution)
    
    return solution

# validate mode when thresholds를 initialize
if CFG.mode == "validate":
    thresholds = {
        "single": {},
        "pair": {}
    }
# train mode when previously saved thresholds를 load
else:
    try:
        thresholds = joblib.load(f"{CFG.model_path}/{CFG.model_name}/thresholds.pkl")
        print(f"load threshold file: {CFG.model_path}/{CFG.model_name}/thresholds.pkl")
    except Exception as e:
        print(f"load threshold file failed: {e}")
        thresholds = {
            "single": {"default": 0.5},
            "pair": {"default": 0.5}
        }

In [None]:
drop_body_parts =  [
    'headpiece_bottombackleft', 'headpiece_bottombackright', 'headpiece_bottomfrontleft', 'headpiece_bottomfrontright', 
    'headpiece_topbackleft', 'headpiece_topbackright', 'headpiece_topfrontleft', 'headpiece_topfrontright', 
    'spine_1', 'spine_2', 'tail_middle_1', 'tail_middle_2', 'tail_midpoint'
]

def generate_mouse_data(dataset, traintest, traintest_directory=None, generate_single=True, generate_pair=True):
    # tracking Use data
    if traintest_directory is None:
        traintest_directory = f"MABe-mouse-behavior-detection/{traintest}_tracking"
        
    for _, row in dataset.iterrows():
        lab_id = row.lab_id
        if lab_id.startswith('MABe22') or type(row.behaviors_labeled) != str: 
            continue
        
        video_id = row.video_id
        path = f"{traintest_directory}/{lab_id}/{video_id}.parquet" # tracking
        vid = pd.read_parquet(path)
        if len(np.unique(vid.bodypart)) > 5:
            vid = vid.query("~ bodypart.isin(@drop_body_parts)")
            
        pvid = vid.pivot(columns=['mouse_id', 'bodypart'], index='video_frame', values=['x', 'y'])

        idx = 1
        del vid
        gc.collect()
        
        pvid = pvid.reorder_levels([1, 2, 0], axis=1).T.sort_index().T
        pvid /= row.pix_per_cm_approx

        vid_behaviors = json.loads(row.behaviors_labeled)
        vid_behaviors = sorted(list({b.replace("'", "") for b in vid_behaviors}))
        vid_behaviors = [b.split(',') for b in vid_behaviors]
        vid_behaviors = pd.DataFrame(vid_behaviors, columns=['agent', 'target', 'action'])
        
        if traintest == 'train':
            try:
                annot = pd.read_parquet(path.replace('train_tracking', 'train_annotation'))
            except FileNotFoundError:
                continue

        if generate_single:
            vid_behaviors_subset = vid_behaviors.query("target == 'self'")
            for mouse_id_str in np.unique(vid_behaviors_subset.agent):
                try:
                    mouse_id = int(mouse_id_str[-1])
                    vid_agent_actions = np.unique(vid_behaviors_subset.query("agent == @mouse_id_str").action)
                    single_mouse = pvid.loc[:, mouse_id]
                    assert len(single_mouse) == len(pvid)
                    single_mouse_meta = pd.DataFrame({
                        'video_id': video_id,
                        'agent_id': mouse_id_str,
                        'target_id': 'self',
                        'video_frame': single_mouse.index
                    })
                    if traintest == 'train':
                        single_mouse_label = pd.DataFrame(0.0, columns=vid_agent_actions, index=single_mouse.index)
                        annot_subset = annot.query("(agent_id == @mouse_id) & (target_id == @mouse_id)")
                        for i in range(len(annot_subset)):
                            annot_row = annot_subset.iloc[i]
                            single_mouse_label.loc[annot_row['start_frame']:annot_row['stop_frame'], annot_row.action] = 1.0
                        yield 'single', single_mouse, single_mouse_meta, single_mouse_label
                    else:
                        yield 'single', single_mouse, single_mouse_meta, vid_agent_actions
                except KeyError:
                    pass

        if generate_pair:
            vid_behaviors_subset = vid_behaviors.query("target != 'self'")
            if len(vid_behaviors_subset) > 0:
                for agent, target in itertools.permutations(np.unique(pvid.columns.get_level_values('mouse_id')), 2): # int8
                    agent_str = f"mouse{agent}"
                    target_str = f"mouse{target}"
                    vid_agent_actions = np.unique(vid_behaviors_subset.query("(agent == @agent_str) & (target == @target_str)").action)
                    mouse_pair = pd.concat([pvid[agent], pvid[target]], axis=1, keys=['A', 'B'])
                    assert len(mouse_pair) == len(pvid)
                    mouse_pair_meta = pd.DataFrame({
                        'video_id': video_id,
                        'agent_id': agent_str,
                        'target_id': target_str,
                        'video_frame': mouse_pair.index
                    })
                    if traintest == 'train':
                        mouse_pair_label = pd.DataFrame(0.0, columns=vid_agent_actions, index=mouse_pair.index)
                        annot_subset = annot.query("(agent_id == @agent) & (target_id == @target)")
                        for i in range(len(annot_subset)):
                            annot_row = annot_subset.iloc[i]
                            mouse_pair_label.loc[annot_row['start_frame']:annot_row['stop_frame'], annot_row.action] = 1.0
                        yield 'pair', mouse_pair, mouse_pair_meta, mouse_pair_label
                    else:
                        yield 'pair', mouse_pair, mouse_pair_meta, vid_agent_actions

In [6]:
def safe_rolling(series, window, func, min_periods=None):
    if min_periods is None:
        min_periods = max(1, window // 4)
    return series.rolling(window, min_periods=min_periods, center=True).apply(func, raw=True)

def _scale(n_frames_at_30fps, fps, ref=30.0):
    return max(1, int(round(n_frames_at_30fps * float(fps) / ref)))

def _scale_signed(n_frames_at_30fps, fps, ref=30.0):
    if n_frames_at_30fps == 0:
        return 0
    s = 1 if n_frames_at_30fps > 0 else -1
    mag = max(1, int(round(abs(n_frames_at_30fps) * float(fps) / ref)))
    return s * mag

def _fps_from_meta(meta_df, fallback_lookup, default_fps=30.0):
    if 'frames_per_second' in meta_df.columns and pd.notnull(meta_df['frames_per_second']).any():
        return float(meta_df['frames_per_second'].iloc[0])
    vid = meta_df['video_id'].iloc[0]
    return float(fallback_lookup.get(vid, default_fps))

def add_curvature_features(X, center_x, center_y, fps):
    vel_x = center_x.diff()
    vel_y = center_y.diff()
    acc_x = vel_x.diff()
    acc_y = vel_y.diff()

    cross_prod = vel_x * acc_y - vel_y * acc_x
    vel_mag = np.sqrt(vel_x**2 + vel_y**2)
    curvature = np.abs(cross_prod) / (vel_mag**3 + 1e-6)

    for w in [25, 50, 75]:
        ws = _scale(w, fps)
        X[f'curv_mean_{w}'] = curvature.rolling(ws, min_periods=max(1, ws // 5)).mean()

    angle = np.arctan2(vel_y, vel_x)
    angle_change = np.abs(angle.diff())
    w = 30
    ws = _scale(w, fps)
    X[f'turn_rate_{w}'] = angle_change.rolling(ws, min_periods=max(1, ws // 5)).sum()

    return X

def add_multiscale_features(X, center_x, center_y, fps):
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)

    scales = [20, 40, 60, 80]
    for scale in scales:
        ws = _scale(scale, fps)
        if len(speed) >= ws:
            X[f'sp_m{scale}'] = speed.rolling(ws, min_periods=max(1, ws // 4)).mean()
            X[f'sp_s{scale}'] = speed.rolling(ws, min_periods=max(1, ws // 4)).std()

    if len(scales) >= 2 and f'sp_m{scales[0]}' in X.columns and f'sp_m{scales[-1]}' in X.columns:
        X['sp_ratio'] = X[f'sp_m{scales[0]}'] / (X[f'sp_m{scales[-1]}'] + 1e-6)

    return X

def add_state_features(X, center_x, center_y, fps):
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)
    w_ma = _scale(15, fps)
    speed_ma = speed.rolling(w_ma, min_periods=max(1, w_ma // 3)).mean()

    try:
        bins = [-np.inf, 0.5 * fps, 2.0 * fps, 5.0 * fps, np.inf]
        speed_states = pd.cut(speed_ma, bins=bins, labels=[0, 1, 2, 3]).astype(float)

        for window in [20, 40, 60, 80]:
            ws = _scale(window, fps)
            if len(speed_states) >= ws:
                for state in [0, 1, 2, 3]:
                    X[f's{state}_{window}'] = (
                        (speed_states == state).astype(float)
                        .rolling(ws, min_periods=max(1, ws // 5)).mean()
                    )
                state_changes = (speed_states != speed_states.shift(1)).astype(float)
                X[f'trans_{window}'] = state_changes.rolling(ws, min_periods=max(1, ws // 5)).sum()
    except Exception:
        pass

    return X

def add_longrange_features(X, center_x, center_y, fps):
    for window in [30, 60, 120]:
        ws = _scale(window, fps)
        if len(center_x) >= ws:
            X[f'x_ml{window}'] = center_x.rolling(ws, min_periods=max(5, ws // 6)).mean()
            X[f'y_ml{window}'] = center_y.rolling(ws, min_periods=max(5, ws // 6)).mean()

    for span in [30, 60, 120]:
        s = _scale(span, fps)
        X[f'x_e{span}'] = center_x.ewm(span=s, min_periods=1).mean()
        X[f'y_e{span}'] = center_y.ewm(span=s, min_periods=1).mean()

    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)  # cm/s
    for window in [30, 60, 120]:
        ws = _scale(window, fps)
        if len(speed) >= ws:
            X[f'sp_pct{window}'] = speed.rolling(ws, min_periods=max(5, ws // 6)).rank(pct=True)

    return X

def add_interaction_features(X, mouse_pair, avail_A, avail_B, fps):
    if 'body_center' not in avail_A or 'body_center' not in avail_B:
        return X

    rel_x = mouse_pair['A']['body_center']['x'] - mouse_pair['B']['body_center']['x']
    rel_y = mouse_pair['A']['body_center']['y'] - mouse_pair['B']['body_center']['y']
    rel_dist = np.sqrt(rel_x**2 + rel_y**2)

    A_vx = mouse_pair['A']['body_center']['x'].diff()
    A_vy = mouse_pair['A']['body_center']['y'].diff()
    B_vx = mouse_pair['B']['body_center']['x'].diff()
    B_vy = mouse_pair['B']['body_center']['y'].diff()

    A_lead = (A_vx * rel_x + A_vy * rel_y) / (np.sqrt(A_vx**2 + A_vy**2) * rel_dist + 1e-6)
    B_lead = (B_vx * (-rel_x) + B_vy * (-rel_y)) / (np.sqrt(B_vx**2 + B_vy**2) * rel_dist + 1e-6)

    for window in [30, 60]:
        ws = _scale(window, fps)
        X[f'A_ld{window}'] = A_lead.rolling(ws, min_periods=max(1, ws // 6)).mean()
        X[f'B_ld{window}'] = B_lead.rolling(ws, min_periods=max(1, ws // 6)).mean()

    approach = -rel_dist.diff()
    chase = approach * B_lead
    w = 30
    ws = _scale(w, fps)
    X[f'chase_{w}'] = chase.rolling(ws, min_periods=max(1, ws // 6)).mean()

    for window in [60, 120]:
        ws = _scale(window, fps)
        A_sp = np.sqrt(A_vx**2 + A_vy**2)
        B_sp = np.sqrt(B_vx**2 + B_vy**2)
        X[f'sp_cor{window}'] = A_sp.rolling(ws, min_periods=max(1, ws // 6)).corr(B_sp)

    return X

In [7]:

def compute_egocentric_transform(mouse_df, body_parts, target_points=None):
    """
    Transforms coordinates to an egocentric frame based on body_center and tail_base.
    Origin: body_center
    Y-axis: vector from tail_base to body_center (spine points UP)
    """
    if 'body_center' not in mouse_df.columns or 'tail_base' not in mouse_df.columns:
        return None
    
    # 1. Translation: Center at body_center
    origin_x = mouse_df['body_center']['x']
    origin_y = mouse_df['body_center']['y']
    
    # 2. Rotation: Align spine (tail_base -> body_center) with Y-axis
    # Vector from tail to center
    spine_x = mouse_df['body_center']['x'] - mouse_df['tail_base']['x']
    spine_y = mouse_df['body_center']['y'] - mouse_df['tail_base']['y']
    
    # Angle of spine relative to Y-axis (0, 1)
    # We want to rotate so that (spine_x, spine_y) becomes (0, +mag)
    # Current angle of spine in global coords
    theta = np.arctan2(spine_y, spine_x)
    # We want this to be pi/2 (90 degrees, pointing up)
    # So we rotate by (pi/2 - theta)
    rotation_angle = np.pi/2 - theta
    
    cos_a = np.cos(rotation_angle)
    sin_a = np.sin(rotation_angle)
    
    ego_data = {}
    
    # Transform own body parts
    for part in body_parts:
        if part in mouse_df.columns:
            # Translate
            dx = mouse_df[part]['x'] - origin_x
            dy = mouse_df[part]['y'] - origin_y
            # Rotate
            ego_data[f'ego_{part}_x'] = dx * cos_a - dy * sin_a
            ego_data[f'ego_{part}_y'] = dx * sin_a + dy * cos_a
            
    # Transform target points (e.g., other mouse's body parts) if provided
    if target_points is not None:
        for col_name, (tx, ty) in target_points.items():
            dx = tx - origin_x
            dy = ty - origin_y
            ego_data[f'ego_target_{col_name}_x'] = dx * cos_a - dy * sin_a
            ego_data[f'ego_target_{col_name}_y'] = dx * sin_a + dy * cos_a
            
    return pd.DataFrame(ego_data, index=mouse_df.index)



def transform_single(single_mouse, body_parts_tracked, fps):
    available_body_parts = single_mouse.columns.get_level_values(0)

    X = pd.DataFrame({
        f"{p1}+{p2}": np.square(single_mouse[p1] - single_mouse[p2]).sum(axis=1, skipna=False)
        for p1, p2 in itertools.combinations(body_parts_tracked, 2)
        if p1 in available_body_parts and p2 in available_body_parts
    })
    X = X.reindex(columns=[f"{p1}+{p2}" for p1, p2 in itertools.combinations(body_parts_tracked, 2)], copy=False)

    if all(p in single_mouse.columns for p in ['ear_left', 'ear_right', 'tail_base']):
        lag = _scale(10, fps)
        shifted = single_mouse[['ear_left', 'ear_right', 'tail_base']].shift(lag)
        speeds = pd.DataFrame({
            'sp_lf': np.square(single_mouse['ear_left'] - shifted['ear_left']).sum(axis=1, skipna=False),
            'sp_rt': np.square(single_mouse['ear_right'] - shifted['ear_right']).sum(axis=1, skipna=False),
            'sp_lf2': np.square(single_mouse['ear_left'] - shifted['tail_base']).sum(axis=1, skipna=False),
            'sp_rt2': np.square(single_mouse['ear_right'] - shifted['tail_base']).sum(axis=1, skipna=False),
        })
        X = pd.concat([X, speeds], axis=1)

    if 'nose+tail_base' in X.columns and 'ear_left+ear_right' in X.columns:
        X['elong'] = X['nose+tail_base'] / (X['ear_left+ear_right'] + 1e-6)

    if all(p in available_body_parts for p in ['nose', 'body_center', 'tail_base']):
        v1 = single_mouse['nose'] - single_mouse['body_center']
        v2 = single_mouse['tail_base'] - single_mouse['body_center']
        X['body_ang'] = (v1['x'] * v2['x'] + v1['y'] * v2['y']) / (
            np.sqrt(v1['x']**2 + v1['y']**2) * np.sqrt(v2['x']**2 + v2['y']**2) + 1e-6)

    if 'body_center' in available_body_parts:
        cx = single_mouse['body_center']['x']
        cy = single_mouse['body_center']['y']

        for w in [5, 15, 30, 60]:
            ws = _scale(w, fps)
            roll = dict(min_periods=1, center=True)
            X[f'cx_m{w}'] = cx.rolling(ws, **roll).mean()
            X[f'cy_m{w}'] = cy.rolling(ws, **roll).mean()
            X[f'cx_s{w}'] = cx.rolling(ws, **roll).std()
            X[f'cy_s{w}'] = cy.rolling(ws, **roll).std()
            X[f'x_rng{w}'] = cx.rolling(ws, **roll).max() - cx.rolling(ws, **roll).min()
            X[f'y_rng{w}'] = cy.rolling(ws, **roll).max() - cy.rolling(ws, **roll).min()
            X[f'disp{w}'] = np.sqrt(cx.diff().rolling(ws, min_periods=1).sum()**2 +
                                     cy.diff().rolling(ws, min_periods=1).sum()**2)
            X[f'act{w}'] = np.sqrt(cx.diff().rolling(ws, min_periods=1).var() +
                                   cy.diff().rolling(ws, min_periods=1).var())

        X = add_curvature_features(X, cx, cy, fps)
        X = add_multiscale_features(X, cx, cy, fps)
        X = add_state_features(X, cx, cy, fps)
        X = add_longrange_features(X, cx, cy, fps)

    if all(p in available_body_parts for p in ['nose', 'tail_base']):
        nt_dist = np.sqrt((single_mouse['nose']['x'] - single_mouse['tail_base']['x'])**2 +
                          (single_mouse['nose']['y'] - single_mouse['tail_base']['y'])**2)
        for lag in [10, 20, 40]:
            l = _scale(lag, fps)
            X[f'nt_lg{lag}'] = nt_dist.shift(l)
            X[f'nt_df{lag}'] = nt_dist - nt_dist.shift(l)

    if all(p in available_body_parts for p in ['ear_left', 'ear_right']):
        ear_d = np.sqrt((single_mouse['ear_left']['x'] - single_mouse['ear_right']['x'])**2 +
                        (single_mouse['ear_left']['y'] - single_mouse['ear_right']['y'])**2)
        for off in [-30, -20, -10, 10, 20, 30]:
            o = _scale_signed(off, fps)
            X[f'ear_o{off}'] = ear_d.shift(-o)
        w = _scale(30, fps)
        X['ear_con'] = ear_d.rolling(w, min_periods=1, center=True).std() / \
                       (ear_d.rolling(w, min_periods=1, center=True).mean() + 1e-6)
    
    # [NEW] Add Egocentric Features
    ego_df = compute_egocentric_transform(single_mouse, body_parts_tracked)
    if ego_df is not None:
        X = pd.concat([X, ego_df], axis=1)

    return X.astype(np.float32, copy=False)



def transform_pair(mouse_pair, body_parts_tracked, fps):
    avail_A = mouse_pair['A'].columns.get_level_values(0)
    avail_B = mouse_pair['B'].columns.get_level_values(0)

    X = pd.DataFrame({
        f"12+{p1}+{p2}": np.square(mouse_pair['A'][p1] - mouse_pair['B'][p2]).sum(axis=1, skipna=False)
        for p1, p2 in itertools.product(body_parts_tracked, repeat=2)
        if p1 in avail_A and p2 in avail_B
    })
    X = X.reindex(columns=[f"12+{p1}+{p2}" for p1, p2 in itertools.product(body_parts_tracked, repeat=2)], copy=False)

    if ('A', 'ear_left') in mouse_pair.columns and ('B', 'ear_left') in mouse_pair.columns:
        lag = _scale(10, fps)
        shA = mouse_pair['A']['ear_left'].shift(lag)
        shB = mouse_pair['B']['ear_left'].shift(lag)
        speeds = pd.DataFrame({
            'sp_A': np.square(mouse_pair['A']['ear_left'] - shA).sum(axis=1, skipna=False),
            'sp_AB': np.square(mouse_pair['A']['ear_left'] - shB).sum(axis=1, skipna=False),
            'sp_B': np.square(mouse_pair['B']['ear_left'] - shB).sum(axis=1, skipna=False),
        })
        X = pd.concat([X, speeds], axis=1)

    if 'nose+tail_base' in X.columns and 'ear_left+ear_right' in X.columns:
        X['elong'] = X['nose+tail_base'] / (X['ear_left+ear_right'] + 1e-6)

    if all(p in avail_A for p in ['nose', 'tail_base']) and all(p in avail_B for p in ['nose', 'tail_base']):
        dir_A = mouse_pair['A']['nose'] - mouse_pair['A']['tail_base']
        dir_B = mouse_pair['B']['nose'] - mouse_pair['B']['tail_base']
        X['rel_ori'] = (dir_A['x'] * dir_B['x'] + dir_A['y'] * dir_B['y']) / (
            np.sqrt(dir_A['x']**2 + dir_A['y']**2) * np.sqrt(dir_B['x']**2 + dir_B['y']**2) + 1e-6)

    if all(p in avail_A for p in ['nose']) and all(p in avail_B for p in ['nose']):
        cur = np.square(mouse_pair['A']['nose'] - mouse_pair['B']['nose']).sum(axis=1, skipna=False)
        lag = _scale(10, fps)
        shA_n = mouse_pair['A']['nose'].shift(lag)
        shB_n = mouse_pair['B']['nose'].shift(lag)
        past = np.square(shA_n - shB_n).sum(axis=1, skipna=False)
        X['appr'] = cur - past

    if 'body_center' in avail_A and 'body_center' in avail_B:
        cd = np.sqrt((mouse_pair['A']['body_center']['x'] - mouse_pair['B']['body_center']['x'])**2 +
                     (mouse_pair['A']['body_center']['y'] - mouse_pair['B']['body_center']['y'])**2)
        X['v_cls'] = (cd < 5.0).astype(float)
        X['cls']   = ((cd >= 5.0) & (cd < 15.0)).astype(float)
        X['med']   = ((cd >= 15.0) & (cd < 30.0)).astype(float)
        X['far']   = (cd >= 30.0).astype(float)

    if 'body_center' in avail_A and 'body_center' in avail_B:
        cd_full = np.square(mouse_pair['A']['body_center'] - mouse_pair['B']['body_center']).sum(axis=1, skipna=False)

        for w in [5, 15, 30, 60]:
            ws = _scale(w, fps)
            roll = dict(min_periods=1, center=True)
            X[f'd_m{w}']  = cd_full.rolling(ws, **roll).mean()
            X[f'd_s{w}']  = cd_full.rolling(ws, **roll).std()
            X[f'd_mn{w}'] = cd_full.rolling(ws, **roll).min()
            X[f'd_mx{w}'] = cd_full.rolling(ws, **roll).max()

            d_var = cd_full.rolling(ws, **roll).var()
            X[f'int{w}'] = 1 / (1 + d_var)

            Axd = mouse_pair['A']['body_center']['x'].diff()
            Ayd = mouse_pair['A']['body_center']['y'].diff()
            Bxd = mouse_pair['B']['body_center']['x'].diff()
            Byd = mouse_pair['B']['body_center']['y'].diff()
            coord = Axd * Bxd + Ayd * Byd
            X[f'co_m{w}'] = coord.rolling(ws, **roll).mean()
            X[f'co_s{w}'] = coord.rolling(ws, **roll).std()

    if 'nose' in avail_A and 'nose' in avail_B:
        nn = np.sqrt((mouse_pair['A']['nose']['x'] - mouse_pair['B']['nose']['x'])**2 +
                     (mouse_pair['A']['nose']['y'] - mouse_pair['B']['nose']['y'])**2)
        for lag in [10, 20, 40]:
            l = _scale(lag, fps)
            X[f'nn_lg{lag}']  = nn.shift(l)
            X[f'nn_ch{lag}']  = nn - nn.shift(l)
            is_cl = (nn < 10.0).astype(float)
            X[f'cl_ps{lag}']  = is_cl.rolling(l, min_periods=1).mean()

    if 'body_center' in avail_A and 'body_center' in avail_B:
        Avx = mouse_pair['A']['body_center']['x'].diff()
        Avy = mouse_pair['A']['body_center']['y'].diff()
        Bvx = mouse_pair['B']['body_center']['x'].diff()
        Bvy = mouse_pair['B']['body_center']['y'].diff()
        val = (Avx * Bvx + Avy * Bvy) / (np.sqrt(Avx**2 + Avy**2) * np.sqrt(Bvx**2 + Bvy**2) + 1e-6)

        for off in [-30, -20, -10, 0, 10, 20, 30]:
            o = _scale_signed(off, fps)
            X[f'va_{off}'] = val.shift(-o)

        w = _scale(30, fps)
        X['int_con'] = cd_full.rolling(w, min_periods=1, center=True).std() / \
                       (cd_full.rolling(w, min_periods=1, center=True).mean() + 1e-6)

        X = add_interaction_features(X, mouse_pair, avail_A, avail_B, fps)
    
    # [NEW] Add Egocentric Features for Pair
    # 1. A's ego features (A in A's frame)
    ego_A = compute_egocentric_transform(mouse_pair['A'], body_parts_tracked)
    if ego_A is not None:
        ego_A.columns = [f"A_{c}" for c in ego_A.columns]
        X = pd.concat([X, ego_A], axis=1)
        
    # 2. B's ego features (B in B's frame) - to know B's posture
    ego_B = compute_egocentric_transform(mouse_pair['B'], body_parts_tracked)
    if ego_B is not None:
        ego_B.columns = [f"B_{c}" for c in ego_B.columns]
        X = pd.concat([X, ego_B], axis=1)

    # 3. B in A's frame (Interaction context: where is B relative to A?)
    if 'body_center' in mouse_pair['B'].columns:
        # Prepare B's key points to transform into A's frame
        target_pts = {
            'body_center': (mouse_pair['B']['body_center']['x'], mouse_pair['B']['body_center']['y'])
        }
        if 'nose' in mouse_pair['B'].columns:
            target_pts['nose'] = (mouse_pair['B']['nose']['x'], mouse_pair['B']['nose']['y'])
            
        ego_B_in_A = compute_egocentric_transform(mouse_pair['A'], [], target_points=target_pts)
        if ego_B_in_A is not None:
             X = pd.concat([X, ego_B_in_A], axis=1)

    return X.astype(np.float32, copy=False)


In [8]:
import numpy as np
import pandas as pd
import json

def robustify(submission, dataset, traintest, traintest_directory=None):
    if traintest_directory is None:
        traintest_directory = f"MABe-mouse-behavior-detection/{traintest}_tracking"
    
    old_submission = submission.copy()
    
    # --------------------------
    # Core fix: Convert start_frame/stop_frame to integers
    # --------------------------
    # Handle string to int conversion（Compatible "123" 这类String，Invalid值Convert to NaN 后Fill为 0）
    submission['start_frame'] = pd.to_numeric(submission['start_frame'], errors='coerce').fillna(0).astype(int)
    submission['stop_frame'] = pd.to_numeric(submission['stop_frame'], errors='coerce').fillna(0).astype(int)
    
    # Filter invalid rows where start >= stop（现在是Integer比较，无类型错误）
    submission = submission[submission.start_frame < submission.stop_frame]
    if len(submission) != len(old_submission):
        print("ERROR: Dropped frames with start >= stop")
    
    old_submission = submission.copy()
    group_list = []
    # Group by video_id + agent_id + target_id，避免跨主体的Frame冲突
    for _, group in submission.groupby(['video_id', 'agent_id', 'target_id']):
        group = group.sort_values('start_frame')  # Sort by start frame
        mask = np.ones(len(group), dtype=bool)
        last_stop_frame = 0
        for i, (_, row) in enumerate(group.iterrows()):
            # 现在 start_frame 是Integer，可正常比较
            if row['start_frame'] < last_stop_frame:
                mask[i] = False  # Filter overlapping actions（当前StartFrame < PreviousStopFrame）
            else:
                last_stop_frame = row['stop_frame']  # Update the stop frame of the previous action
        group_list.append(group[mask])
        
    submission = pd.concat(group_list)
    
    if len(submission) != len(old_submission):
        print("ERROR: Dropped duplicate frames")
        
    s_list = []
    # Handle videos with no predictions，FillDefaultActionFrame
    for idx, row in dataset.iterrows():
        lab_id = row['lab_id']
        if lab_id.startswith('MABe22'):
            continue
        
        video_id = row['video_id']
        if (submission.video_id == video_id).any():
            continue  # Predictions exist, skipping
        
        if type(row.behaviors_labeled) != str:
            continue

        print(f"Video {video_id} has no predictions.")
        
        path = f"{traintest_directory}/{lab_id}/{video_id}.parquet"
        vid = pd.read_parquet(path)
    
        vid_behaviors = json.loads(row['behaviors_labeled'])
        vid_behaviors = sorted(list({b.replace("'", "") for b in vid_behaviors}))
        vid_behaviors = [b.split(',') for b in vid_behaviors]
        vid_behaviors = pd.DataFrame(vid_behaviors, columns=['agent', 'target', 'action'])
    
        start_frame = vid.video_frame.min()
        stop_frame = vid.video_frame.max() + 1
    
        # Group by agent + target，均匀分配ActionFrame
        for (agent, target), actions in vid_behaviors.groupby(['agent', 'target']):
            batch_length = int(np.ceil((stop_frame - start_frame) / len(actions)))
            for i, (_, action_row) in enumerate(actions.iterrows()):
                batch_start = start_frame + i * batch_length
                batch_stop = min(batch_start + batch_length, stop_frame)
                s_list.append((video_id, agent, target, action_row['action'], batch_start, batch_stop))

    if len(s_list) > 0:
        # New filled rows，start_frame/stop_frame 本身是Integer，无需转换
        submission = pd.concat([
            submission,
            pd.DataFrame(s_list, columns=['video_id', 'agent_id', 'target_id', 'action', 'start_frame', 'stop_frame'])
        ])
        print("ERROR: Filled empty videos")

    submission = submission.reset_index(drop=True)
    
    return submission

In [9]:
def predict_multiclass(pred, meta, thresholds):
    ama = np.argmax(pred.values, axis=1)
    max_proba = pred.max(axis=1).values

    threshold_array = np.array([thresholds.get(col, 0.27) for col in pred.columns])
    action_thresholds = threshold_array[ama]

    ama = np.where(max_proba >= action_thresholds, ama, -1)
    ama = pd.Series(ama, index=meta.video_frame)
    
    changes_mask = (ama != ama.shift(1)).values
    ama_changes = ama[changes_mask]
    meta_changes = meta[changes_mask]
    
    mask = ama_changes.values >= 0
    mask[-1] = False
    
    submission_part = pd.DataFrame({
        'video_id': meta_changes['video_id'][mask].values,
        'agent_id': meta_changes['agent_id'][mask].values,
        'target_id': meta_changes['target_id'][mask].values,
        'action': pred.columns[ama_changes[mask].values],
        'start_frame': ama_changes.index[mask],
        'stop_frame': ama_changes.index[1:][mask[:-1]]
    })
    
    stop_video_id = meta_changes['video_id'][1:][mask[:-1]].values
    stop_agent_id = meta_changes['agent_id'][1:][mask[:-1]].values
    stop_target_id = meta_changes['target_id'][1:][mask[:-1]].values
    for i in range(len(submission_part)):
        video_id = submission_part.video_id.iloc[i]
        agent_id = submission_part.agent_id.iloc[i]
        target_id = submission_part.target_id.iloc[i]
        if stop_video_id[i] != video_id or stop_agent_id[i] != agent_id or stop_target_id[i] != target_id:
            new_stop_frame = meta.query("(video_id == @video_id)").video_frame.max() + 1
            submission_part.iat[i, submission_part.columns.get_loc('stop_frame')] = new_stop_frame

    return submission_part

In [10]:
def tune_threshold(oof_action, y_action):
    def objective(trial):
        threshold = trial.suggest_float("threshold", 0, 1, step=0.01)
        return f1_score(y_action, (oof_action >= threshold), zero_division=0)

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=1000, n_jobs=-1)
    return study.best_params["threshold"]

In [None]:
def cross_validate_classifier_xgb(X, label, meta, body_parts_tracked_str, section):
    """
    Uses XGBoost for training and cross-validation.
    """
    oof = pd.DataFrame(index=meta.video_frame)
    f1_list = []
    submission_list = []
    thresholds = {}

    for action in label.columns:
        action_mask = ~label[action].isna().values
        y_action = label[action][action_mask].values.astype(int)
        X_action = X[action_mask]
        groups_action = meta.video_id[action_mask]

        if len(np.unique(groups_action)) < 2:
            continue

        if not (y_action == 0).all():
            try:
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore')

                    # Use GroupKFold
                    cv = GroupKFold(n_splits=min(2, len(np.unique(groups_action))))
                    oof_action = np.zeros(len(y_action))
                    preds = []

                    for train_idx, valid_idx in cv.split(X_action, y_action, groups_action):
                        model = XGBClassifier(**CFG.xgb_params)
                        model.fit(
                            X_action.iloc[train_idx], y_action[train_idx],
                            eval_set=[(X_action.iloc[valid_idx], y_action[valid_idx])],
                            verbose=False
                        )

                        # Feature Importance
                        importances = pd.DataFrame({
                            'feature': X_action.columns,
                            'importance': model.feature_importances_
                        }).sort_values('importance', ascending=False)
                        # print(f"\n[Fold {len(preds)+1}] Top 20 Feature Importance:")
                        # print(importances.head(20))

                        oof_action[valid_idx] = model.predict_proba(X_action.iloc[valid_idx])[:, 1]
                        preds.append(model)

                    threshold = tune_threshold(oof_action, y_action)
                    thresholds[action] = threshold
                    f1 = f1_score(y_action, (oof_action >= threshold), zero_division=0)
                    f1_list.append((body_parts_tracked_str, action, f1))
                    print(f"\tF1: {f1:.4f} ({threshold:.2f}) Section: {section} Action: {action}")

                    model_dir = f"{CFG.model_path}/{CFG.model_name}/{section}/{action}"
                    os.makedirs(model_dir, exist_ok=True)

                    joblib.dump(preds, f"{model_dir}/xgb_trainer.pkl")

            except Exception as e:
                print(f"\tTraining failed: {e}")
                oof_action = np.zeros(len(y_action))
        else:
            oof_action = np.zeros(len(y_action))
            print(f"\tF1: 0.0000 (0.00) Section: {section} Action: {action}")

        oof_column = np.zeros(len(label))
        oof_column[action_mask] = oof_action
        oof[action] = oof_column

        del oof_action, action_mask, X_action, y_action, groups_action
        gc.collect()

    # submission_part = predict_multiclass_improved(oof, meta, thresholds)
    submission_part = predict_multiclass_hmm(oof, meta, thresholds)
    submission_list.append(submission_part)

    return submission_list, f1_list, thresholds


In [None]:

def cross_validate_classifier_xgb_optuna(X, label, meta, body_parts_tracked_str, section, n_trials=20):
    """
    XGBoost training with Optuna hyperparameter optimization (conservative range).
    """
    os.makedirs(f"{CFG.model_path}/{CFG.model_name}/{section}", exist_ok=True)

    oof = pd.DataFrame(index=meta.video_frame)
    f1_list = []
    submission_list = []
    thresholds = {}

    for action in label.columns:
        action_mask = ~label[action].isna().values
        y_action = label[action][action_mask].values.astype(int)
        X_action = X[action_mask]
        groups_action = meta.video_id[action_mask]

        if len(np.unique(groups_action)) < 2:
            continue

        if not (y_action == 0).all():
            try:
                # Optuna objective function
                def objective(trial):
                    params = {
                        'n_estimators': trial.suggest_int('n_estimators', 200, 400),
                        'learning_rate': trial.suggest_float('learning_rate', 0.03, 0.08),
                        'max_depth': trial.suggest_int('max_depth', 5, 7),
                        'subsample': trial.suggest_float('subsample', 0.8, 1.0),
                        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.8, 1.0),
                        'n_jobs': -1,
                        'tree_method': 'hist',
                        'random_state': 42,
                        'eval_metric': 'logloss',
                        'device': 'cuda'
                    }
                    
                    cv = GroupKFold(n_splits=2)
                    scores = []
                    
                    for train_idx, valid_idx in cv.split(X_action, y_action, groups_action):
                        model = XGBClassifier(**params)
                        model.fit(X_action.iloc[train_idx], y_action[train_idx])
                        pred = model.predict_proba(X_action.iloc[valid_idx])[:, 1]
                        score = f1_score(y_action[valid_idx], (pred >= 0.5), zero_division=0)
                        scores.append(score)
                    
                    return np.mean(scores)
                
                # Optuna optimization
                study = optuna.create_study(direction='maximize')
                study.optimize(objective, n_trials=n_trials)
                
                best_params = study.best_params
                best_params.update({
                    'n_jobs': -1,
                    'tree_method': 'hist',
                    'random_state': 42,
                    'eval_metric': 'logloss',
                    'device': 'cuda'
                })
                
                print(f"\tBest params for {action}: {best_params}")
                
                # Train final model with best params
                cv = GroupKFold(n_splits=2)
                oof_action = np.zeros(len(y_action))
                models = []

                for train_idx, valid_idx in cv.split(X_action, y_action, groups_action):
                    model = XGBClassifier(**best_params)
                    model.fit(X_action.iloc[train_idx], y_action[train_idx])
                    oof_action[valid_idx] = model.predict_proba(X_action.iloc[valid_idx])[:, 1]
                    models.append(model)

                threshold = tune_threshold(oof_action, y_action)
                thresholds[action] = threshold

                f1 = f1_score(y_action, (oof_action >= threshold), zero_division=0)
                f1_list.append((body_parts_tracked_str, action, f1))
                print(f"\tF1: {f1:.4f} (Threshold: {threshold:.3f}) - Action: {action}")

                model_dir = f"{CFG.model_path}/{CFG.model_name}/{section}/{action}"
                os.makedirs(model_dir, exist_ok=True)
                joblib.dump(models, f"{model_dir}/xgb_trainer.pkl")

            except Exception as e:
                print(f"\tTraining failed {action}: {e}")
                oof_action = np.zeros(len(y_action))
        else:
            oof_action = np.zeros(len(y_action))
            thresholds[action] = 0.5

        oof_column = np.zeros(len(label))
        oof_column[action_mask] = oof_action
        oof[action] = oof_column

        del oof_action, action_mask, X_action, y_action, groups_action
        gc.collect()

    # submission_part = predict_multiclass_improved(oof, meta, thresholds)
    submission_part = predict_multiclass_hmm(oof, meta, thresholds)
    submission_list.append(submission_part)

    return submission_list, f1_list, thresholds


In [None]:
import glob
import hashlib
import numpy as np
import pandas as pd
import json
import gc
from concurrent.futures import ProcessPoolExecutor, as_completed
import joblib

# [NEW] Post-processing: Gap Filling
def fill_gaps(binary_preds, max_gap=5):
    """Fills short gaps (0s) between 1s."""
    # Convert to int for processing
    preds = binary_preds.astype(int)
    # Find runs of 0s
    # Pad with 1s to handle edge cases
    padded = np.concatenate(([1], preds, [1]))
    diff = np.diff(padded)
    starts = np.where(diff == -1)[0]
    stops = np.where(diff == 1)[0]
    
    for start, stop in zip(starts, stops):
        gap_len = stop - start
        if gap_len <= max_gap:
            preds[start:stop] = 1
    return preds

# [NEW] Post-processing: Min Duration
def remove_short_duration(binary_preds, min_len=5):
    """Removes short bursts of 1s."""
    preds = binary_preds.astype(int)
    padded = np.concatenate(([0], preds, [0]))
    diff = np.diff(padded)
    starts = np.where(diff == 1)[0]
    stops = np.where(diff == -1)[0]
    
    for start, stop in zip(starts, stops):
        duration = stop - start
        if duration <= min_len:
            preds[start:stop] = 0
    return preds



def predict_multiclass_improved(pred, meta, thresholds, min_duration=5, max_gap=5):
    """
    Updated prediction function with Gap Filling and Min Duration filtering.
    """
    if pred.empty:
        return pd.DataFrame()
    
    # Ensure meta has correct index
    if not all(col in meta.columns for col in ['video_id', 'agent_id', 'target_id', 'video_frame']):
        print(f"\t    Meta data missing required columns: {meta.columns}")
        return pd.DataFrame()
    
    # 1. Apply Thresholds to get Binary Masks per Action
    binary_masks = {}
    for col in pred.columns:
        thresh = thresholds.get(col, 0.27)
        mask = (pred[col].values >= thresh).astype(int)
        
        # [NEW] Apply Post-processing per action
        mask = fill_gaps(mask, max_gap=max_gap)
        mask = remove_short_duration(mask, min_len=min_duration)
        
        binary_masks[col] = mask
        
    # 2. Combine to Multi-class (Argmax)
    # We need to handle conflicts. If multiple actions are 1, pick max prob?
    # Or just use the processed masks to filter the original probabilities?
    # Let's use the processed masks as a hard filter, then argmax on probs.
    
    valid_mask = np.zeros(len(pred), dtype=bool)
    for col, mask in binary_masks.items():
        valid_mask |= (mask == 1)
        
    # If no action is valid, it's background (-1)
    # If multiple are valid, pick max prob among them
    
    ama = np.full(len(pred), -1)
    
    # Only consider frames where at least one action passed the filters
    if np.any(valid_mask):
        # Mask out probabilities where binary_mask is 0
        masked_probs = pred.copy()
        for col in pred.columns:
            masked_probs.loc[binary_masks[col] == 0, col] = -1.0
            
        # Argmax on masked probs
        best_idx = np.argmax(masked_probs.values, axis=1)
        max_val = np.max(masked_probs.values, axis=1)
        
        # Assign where max_val > -1 (meaning at least one action was valid)
        ama = np.where(max_val > -1.0, best_idx, -1)

    # Construct Segments
    submission_parts = []
    
    if len(ama) == 0:
        return pd.DataFrame()
    
    current_action = ama[0]
    start_frame = meta.video_frame.iloc[0] if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[0]
    
    for i in range(1, len(ama)):
        if ama[i] != current_action:
            # End of current segment
            if current_action >= 0:
                try:
                    video_id = meta.video_id.iloc[i-1] if hasattr(meta.video_id, 'iloc') else meta.video_id.values[i-1]
                    agent_id = meta.agent_id.iloc[i-1] if hasattr(meta.agent_id, 'iloc') else meta.agent_id.values[i-1]
                    target_id = meta.target_id.iloc[i-1] if hasattr(meta.target_id, 'iloc') else meta.target_id.values[i-1]
                    stop_frame = meta.video_frame.iloc[i-1] + 1 if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[i-1] + 1
                    
                    submission_parts.append({
                        'video_id': video_id,
                        'agent_id': agent_id,
                        'target_id': target_id,
                        'action': pred.columns[current_action],
                        'start_frame': start_frame,
                        'stop_frame': stop_frame
                    })
                except Exception as e:
                    print(f"\t      Failed to create submission segment: {e}")
            
            # Start new segment
            current_action = ama[i]
            start_frame = meta.video_frame.iloc[i] if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[i]
            
    # Last segment
    if current_action >= 0:
        try:
            video_id = meta.video_id.iloc[-1] if hasattr(meta.video_id, 'iloc') else meta.video_id.values[-1]
            agent_id = meta.agent_id.iloc[-1] if hasattr(meta.agent_id, 'iloc') else meta.agent_id.values[-1]
            target_id = meta.target_id.iloc[-1] if hasattr(meta.target_id, 'iloc') else meta.target_id.values[-1]
            stop_frame = meta.video_frame.iloc[-1] + 1 if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[-1] + 1
            
            submission_parts.append({
                'video_id': video_id,
                'agent_id': agent_id,
                'target_id': target_id,
                'action': pred.columns[current_action],
                'start_frame': start_frame,
                'stop_frame': stop_frame
            })
        except Exception as e:
            print(f"\t      Failed to create last submission segment: {e}")
            
    return pd.DataFrame(submission_parts)

In [None]:
# ============================================================
# HMM Viterbi Algorithm for Post-processing (test88)
# 이 코드를 test88.ipynb의 predict_multiclass_improved 함수 앞에 추가하세요
# ============================================================

import numpy as np

def viterbi_decode(emission_probs, trans_mat, start_prob=None):
    """
    Viterbi algorithm for finding the most likely sequence of hidden states.
    
    Args:
        emission_probs: (n_frames, n_states) - Probability of each state at each frame
        trans_mat: (n_states, n_states) - Transition probabilities [from_state, to_state]
        start_prob: (n_states,) - Initial state probabilities (optional)
    
    Returns:
        path: (n_frames,) - Most likely sequence of states
    """
    n_frames, n_states = emission_probs.shape
    
    # Initialize start probabilities (uniform if not provided)
    if start_prob is None:
        start_prob = np.ones(n_states) / n_states
    
    # Log probabilities for numerical stability
    log_emission = np.log(emission_probs + 1e-10)
    log_trans = np.log(trans_mat + 1e-10)
    log_start = np.log(start_prob + 1e-10)
    
    # Viterbi tables
    viterbi = np.zeros((n_frames, n_states))
    backpointer = np.zeros((n_frames, n_states), dtype=int)
    
    # Initialization
    viterbi[0] = log_start + log_emission[0]
    
    # Forward pass
    for t in range(1, n_frames):
        for s in range(n_states):
            # Find the most likely previous state
            trans_probs = viterbi[t-1] + log_trans[:, s]
            backpointer[t, s] = np.argmax(trans_probs)
            viterbi[t, s] = trans_probs[backpointer[t, s]] + log_emission[t, s]
    
    # Backtracking
    path = np.zeros(n_frames, dtype=int)
    path[-1] = np.argmax(viterbi[-1])
    
    for t in range(n_frames - 2, -1, -1):
        path[t] = backpointer[t + 1, path[t + 1]]
    
    return path


def create_transition_matrix(n_states, self_prob=0.85):
    """
    Create a transition matrix favoring staying in the same state.
    
    Args:
        n_states: Number of states (including background state)
        self_prob: Probability of staying in the same state (higher = smoother)
    
    Returns:
        trans_mat: (n_states, n_states) transition matrix
    """
    trans_mat = np.ones((n_states, n_states)) * ((1 - self_prob) / (n_states - 1))
    np.fill_diagonal(trans_mat, self_prob)
    return trans_mat


def predict_multiclass_hmm(pred, meta, thresholds, self_prob=0.85):
    """
    [test88 NEW] HMM-based prediction function using Viterbi decoding.
    Replaces binary mask smoothing with probabilistic sequence modeling.
    """
    if pred.empty:
        return pd.DataFrame()
    
    if not all(col in meta.columns for col in ['video_id', 'agent_id', 'target_id', 'video_frame']):
        print(f"\t    Meta data missing required columns: {meta.columns}")
        return pd.DataFrame()
    
    n_actions = len(pred.columns)
    n_frames = len(pred)
    
    # 1. Create emission probabilities (n_frames, n_states)
    # Include background state (index 0), actions (index 1..n_actions)
    emission_probs = np.zeros((n_frames, n_actions + 1))
    
    for i, col in enumerate(pred.columns):
        thresh = thresholds.get(col, 0.27)
        # Action probabilities (after thresholding)
        action_prob = pred[col].values
        emission_probs[:, i + 1] = action_prob * (action_prob >= thresh)
    
    # Background probability (1 - sum of action probs)
    emission_probs[:, 0] = 1.0 - emission_probs[:, 1:].sum(axis=1)
    emission_probs = np.clip(emission_probs, 1e-10, 1.0)
    
    # Normalize to ensure valid probabilities
    row_sums = emission_probs.sum(axis=1, keepdims=True)
    emission_probs = emission_probs / row_sums
    
    # 2. Create transition matrix
    trans_mat = create_transition_matrix(n_actions + 1, self_prob=self_prob)
    
    # 3. Run Viterbi decoding
    path = viterbi_decode(emission_probs, trans_mat)
    
    # path[i] = 0 means background, path[i] = k (k>0) means action index (k-1)
    
    # 4. Convert Viterbi path to submission format
    submission_parts = []
    
    if len(path) == 0:
        return pd.DataFrame()
    
    current_action = path[0]
    start_frame = meta.video_frame.iloc[0] if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[0]
    
    for i in range(1, len(path)):
        if path[i] != current_action:
            # End of current segment
            if current_action > 0:  # Not background
                try:
                    video_id = meta.video_id.iloc[i-1] if hasattr(meta.video_id, 'iloc') else meta.video_id.values[i-1]
                    agent_id = meta.agent_id.iloc[i-1] if hasattr(meta.agent_id, 'iloc') else meta.agent_id.values[i-1]
                    target_id = meta.target_id.iloc[i-1] if hasattr(meta.target_id, 'iloc') else meta.target_id.values[i-1]
                    stop_frame = meta.video_frame.iloc[i-1] + 1 if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[i-1] + 1
                    
                    submission_parts.append({
                        'video_id': video_id,
                        'agent_id': agent_id,
                        'target_id': target_id,
                        'action': pred.columns[current_action - 1],  # -1 because state 0 is background
                        'start_frame': start_frame,
                        'stop_frame': stop_frame
                    })
                except Exception as e:
                    print(f"\t      Failed to create submission segment: {e}")
            
            # Start new segment
            current_action = path[i]
            start_frame = meta.video_frame.iloc[i] if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[i]
    
    # Last segment
    if current_action > 0:
        try:
            video_id = meta.video_id.iloc[-1] if hasattr(meta.video_id, 'iloc') else meta.video_id.values[-1]
            agent_id = meta.agent_id.iloc[-1] if hasattr(meta.agent_id, 'iloc') else meta.agent_id.values[-1]
            target_id = meta.target_id.iloc[-1] if hasattr(meta.target_id, 'iloc') else meta.target_id.values[-1]
            stop_frame = meta.video_frame.iloc[-1] + 1 if hasattr(meta.video_frame, 'iloc') else meta.video_frame.values[-1] + 1
            
            submission_parts.append({
                'video_id': video_id,
                'agent_id': agent_id,
                'target_id': target_id,
                'action': pred.columns[current_action - 1],
                'start_frame': start_frame,
                'stop_frame': stop_frame
            })
        except Exception as e:
            print(f"\t      Failed to create last submission segment: {e}")
    
    return pd.DataFrame(submission_parts)


In [None]:

def submit_xgb(body_parts_tracked_str, switch_tr, section, thresholds, max_workers=2):
    """
    Submit function using XGBoost models.
    """
    body_parts_tracked = json.loads(body_parts_tracked_str)
    if len(body_parts_tracked) > 5:
        body_parts_tracked = [b for b in body_parts_tracked if b not in drop_body_parts]

    test_subset = test[test.body_parts_tracked == body_parts_tracked_str]
    
    print(f"\tTest subset size: {len(test_subset)}")
    
    if len(test_subset) == 0:
        return []
    
    fps_lookup = (
        test_subset[['video_id', 'frames_per_second']]
        .drop_duplicates('video_id')
        .set_index('video_id')['frames_per_second']
        .to_dict()
    )
    
    models_cache = {}
    all_actions = set()
    
    for _, row in test_subset.iterrows():
        if type(row.behaviors_labeled) == str:
            try:
                behaviors = json.loads(row.behaviors_labeled)
                for behavior in behaviors:
                    if isinstance(behavior, str) and ',' in behavior:
                        parts = behavior.split(',')
                        if len(parts) >= 3:
                            action = parts[2]
                        else:
                            action = behavior
                    else:
                        action = str(behavior)
                    all_actions.add(action)
            except Exception as e:
                continue
    
    for action in all_actions:
        try:
            model_path_pattern = f"{CFG.model_path}/{CFG.model_name}/{section}/{action}/xgb_trainer.pkl"
            model_files = glob.glob(model_path_pattern)
            
            if model_files:
                models = joblib.load(model_files[0])
                models_cache[action] = models
                print(f"\t✓ Loaded model: {action}")
            else:
                print(f"\t✗ Model not found: {action}")
        except Exception as e:
            print(f"\t✗ Failed to load model {action}: {e}")
    
    def process_single_video(row):
        try:
            video_data = pd.DataFrame([row])
            generator = generate_mouse_data(
                video_data,
                'test',
                traintest_directory=CFG.test_tracking_path,
                generate_single=(switch_tr == 'single'),
                generate_pair=(switch_tr == 'pair')
            )
            
            video_submissions = []
            
            for switch_te, data_te, meta_te, actions_te in generator:
                try:
                    fps_i = _fps_from_meta(meta_te, fps_lookup, default_fps=30.0)
                    
                    if switch_te == 'single':
                        X_te = transform_single(data_te, body_parts_tracked, fps_i).astype(np.float32)
                    else:
                        X_te = transform_pair(data_te, body_parts_tracked, fps_i).astype(np.float32)
                    
                    del data_te
                    gc.collect()
                    
                    if X_te.shape[0] == 0:
                        continue
                    
                    pred = pd.DataFrame(index=meta_te.video_frame)
                    
                    for action in actions_te:
                        if action in models_cache:
                            try:
                                models = models_cache[action]
                                probas = []
                                for model in models:
                                    prob = model.predict_proba(X_te)[:, 1]
                                    probas.append(prob)
                                
                                if probas:
                                    mean_prob = np.mean(probas, axis=0)
                                    pred[action] = mean_prob
                            except Exception as e:
                                continue
                    
                    del X_te
                    gc.collect()
                    
                    if not pred.empty and pred.shape[1] > 0:
                        # submission_part = predict_multiclass_improved(pred, meta_te, thresholds)
                        submission_part = predict_multiclass_hmm(pred, meta_te, thresholds)
                        if len(submission_part) > 0:
                            video_submissions.append(submission_part)
                    
                    del pred
                    gc.collect()
                    
                except Exception as e:
                    continue
            
            return video_submissions
            
        except Exception as e:
            return []
    
    submission_list = []
    
    for idx, row in test_subset.iterrows():
        result = process_single_video(row)
        if result:
            submission_list.extend(result)
        if idx % 5 == 0:
            gc.collect()
    
    del models_cache
    gc.collect()
    
    return submission_list

submit = submit_xgb


In [14]:
if CFG.mode == "validate":
    thresholds = {
        "single": {},
        "pair": {}
    }
else:
    try:
        thresholds_path = f"{CFG.model_path}/thresholds.pkl"
        thresholds = joblib.load(thresholds_path)
        print(f"SuccessfullyLoadThresholdFile: {thresholds_path}")
    except Exception as e:
        print(f"LoadThresholdFileFailed: {e}")
        # 尝试其他可能的Path
        try:
            thresholds_path = f"/kaggle/input/kk-lightgbm/lightgbm/thresholds.pkl"
            thresholds = joblib.load(thresholds_path)
            print(f"SuccessfullyLoadThresholdFile: {thresholds_path}")
        except Exception as e2:
            print(f"再次LoadThresholdFileFailed: {e2}")
            thresholds = {
                "single": {"default": 0.5},
                "pair": {"default": 0.5}
            }

In [None]:

f1_list = []
submission_list = []

for section in range(1, len(body_parts_tracked_list)):
    body_parts_tracked_str = body_parts_tracked_list[section]
    try:
        body_parts_tracked = json.loads(body_parts_tracked_str)
        print(f"{section}/{len(body_parts_tracked_list)-1} Processing videos with: {body_parts_tracked}\n")
        
        if len(body_parts_tracked) > 5:
            body_parts_tracked = [b for b in body_parts_tracked if b not in drop_body_parts]
    
        train_subset = train[train.body_parts_tracked == body_parts_tracked_str]

        _fps_lookup = (
            train_subset[['video_id', 'frames_per_second']]
            .drop_duplicates('video_id')
            .set_index('video_id')['frames_per_second']
            .to_dict()
        )
        
        single_mouse_list = []
        single_mouse_label_list = []
        single_mouse_meta_list = []
        
        mouse_pair_list = []
        mouse_pair_label_list = []
        mouse_pair_meta_list = []
    
        for switch, data, meta, label in generate_mouse_data(train_subset, 'train'):
            if switch == 'single':
                single_mouse_list.append(data)
                single_mouse_meta_list.append(meta)
                single_mouse_label_list.append(label)
            else:
                mouse_pair_list.append(data)
                mouse_pair_meta_list.append(meta)
                mouse_pair_label_list.append(label)
            
            del data, meta, label
        gc.collect()
    
    
        if len(single_mouse_list) > 0:
            single_feats_parts = []
            for data_i, meta_i in zip(single_mouse_list, single_mouse_meta_list):
                fps_i = _fps_from_meta(meta_i, _fps_lookup, default_fps=30.0)
                X_i = transform_single(data_i, body_parts_tracked, fps_i).astype(np.float32)
                single_feats_parts.append(X_i)
                del X_i, fps_i
            gc.collect()

            X_tr = pd.concat(single_feats_parts, axis=0, ignore_index=True)
            single_mouse_label = pd.concat(single_mouse_label_list, axis=0, ignore_index=True)
            single_mouse_meta = pd.concat(single_mouse_meta_list, axis=0, ignore_index=True)
            
            del single_feats_parts, single_mouse_list, single_mouse_label_list, single_mouse_meta_list
            gc.collect()

            if CFG.mode == 'validate':
                temp_submission_list, temp_f1_list, temp_thresholds = cross_validate_classifier_xgb(X_tr, single_mouse_label, single_mouse_meta, body_parts_tracked_str, section)
                
                if f"{section}" not in thresholds["single"].keys():
                    thresholds["single"][f"{section}"] = {}
                for k, v in temp_thresholds.items():
                    thresholds["single"][f"{section}"][k] = v                  
                
                f1_list.extend(temp_f1_list)
                submission_list.extend(temp_submission_list)
                
                del temp_submission_list, temp_f1_list, temp_thresholds, X_tr
                gc.collect()
            else:
                temp_submission_list = submit(body_parts_tracked_str, 'single', section, thresholds["single"][f"{section}"])
                submission_list.extend(temp_submission_list)
                
                del temp_submission_list, X_tr
                gc.collect()
                
        if len(mouse_pair_list) > 0:
            pair_feats_parts = []
            for data_i, meta_i in zip(mouse_pair_list, mouse_pair_meta_list):
                fps_i = _fps_from_meta(meta_i, _fps_lookup, default_fps=30.0)
                X_i = transform_pair(data_i, body_parts_tracked, fps_i).astype(np.float32)
                pair_feats_parts.append(X_i)
                del X_i, fps_i
            gc.collect()

            X_tr = pd.concat(pair_feats_parts, axis=0, ignore_index=True)
            mouse_pair_label = pd.concat(mouse_pair_label_list, axis=0, ignore_index=True)
            mouse_pair_meta = pd.concat(mouse_pair_meta_list, axis=0, ignore_index=True)
            
            del pair_feats_parts, mouse_pair_list, mouse_pair_label_list, mouse_pair_meta_list
            gc.collect()

            if CFG.mode == 'validate':
                temp_submission_list, temp_f1_list, temp_thresholds = cross_validate_classifier_xgb(X_tr, mouse_pair_label, mouse_pair_meta, body_parts_tracked_str, section)

                if f"{section}" not in thresholds["pair"].keys():
                    thresholds["pair"][f"{section}"] = {}
                for k, v in temp_thresholds.items():
                    thresholds["pair"][f"{section}"][k] = v  
                    
                f1_list.extend(temp_f1_list)
                submission_list.extend(temp_submission_list)
                
                del temp_submission_list, temp_f1_list, temp_thresholds, X_tr
                gc.collect()
            else:
                temp_submission_list = submit(body_parts_tracked_str, 'pair', section, thresholds["pair"][f"{section}"])
                
                submission_list.extend(temp_submission_list)
                del temp_submission_list, X_tr
                gc.collect()
                
    except Exception as e:
        print(f"\t{e}")
    print()

1/9 Processing videos with: ['body_center', 'ear_left', 'ear_right', 'headpiece_bottombackleft', 'headpiece_bottombackright', 'headpiece_bottomfrontleft', 'headpiece_bottomfrontright', 'headpiece_topbackleft', 'headpiece_topbackright', 'headpiece_topfrontleft', 'headpiece_topfrontright', 'lateral_left', 'lateral_right', 'neck', 'nose', 'tail_base', 'tail_midpoint', 'tail_tip']

                       x                                                       \
mouse_id               1                                                        
bodypart     body_center    ear_right lateral_left lateral_right    tail_base   
video_frame                                                                     
0            1161.543945  1146.305054  1182.458984   1131.587036  1142.069946   
1            1162.682983  1139.979980  1184.682983   1129.592041  1141.159058   
2            1154.698975  1138.678955  1181.415039   1128.328003  1149.113037   
3            1161.491943  1138.991943  1184.022949  

In [None]:
if CFG.mode == 'validate':  
    submission = pd.concat(submission_list)
    submission_robust = robustify(submission, train, 'train')
    print(f"Competition metric: {score(solution, submission_robust, ''):.4f}")

    f1_df = pd.DataFrame(f1_list, columns=['body_parts_tracked_str', 'action', 'binary F1 score'])
    print(f"Mean F1:            {f1_df['binary F1 score'].mean():.4f}")
  
    joblib.dump(thresholds, f"{CFG.model_name}/thresholds.pkl")
    joblib.dump(f1_df, f"{CFG.model_name}/scores.pkl")

ValueError: No objects to concatenate

In [None]:
if CFG.mode == 'validate':  
    submission = pd.concat(submission_list)
    submission_robust = robustify(submission, train, 'train')
    solution = create_solution_df(train_without_mabe22)
    print(f"Competition metric: {score(solution, submission_robust, ''):.4f}")

    # 修改：根据新的f1_list格式调整DataFrameCreate
    # 新的f1_list格式可能是 (body_parts_tracked_str, action, f1, threshold)
    if f1_list and len(f1_list[0]) == 4:  # 四元组格式
        f1_df = pd.DataFrame(f1_list, columns=['body_parts_tracked_str', 'action', 'binary F1 score', 'threshold'])
    else:  # 三元组格式（Compatible旧版本）
        f1_df = pd.DataFrame(f1_list, columns=['body_parts_tracked_str', 'action', 'binary F1 score'])
    
    print(f"Mean F1:            {f1_df['binary F1 score'].mean():.4f}")
  
    # 修改：Ensure使用正确的Model名称Path
    os.makedirs(CFG.model_name, exist_ok=True)  # Ensuredirectory存在
    joblib.dump(thresholds, f"{CFG.model_name}/thresholds.pkl")
    joblib.dump(f1_df, f"{CFG.model_name}/scores.pkl")
    print(f"Model and thresholds saved to {CFG.model_name}/ directory")

if CFG.mode == 'submit':
    if len(submission_list) > 0:
        submission = pd.concat(submission_list)
    else:
        # 提供Default值以防万一
        submission = pd.DataFrame({
            'video_id': [438887472],
            'agent_id': ['mouse1'],
            'target_id': ['self'], 
            'action': ['rear'],
            'start_frame': [278],
            'stop_frame': [500]
        })
        
    submission_robust = robustify(submission, test, 'test')
    submission_robust.index.name = 'row_id'
    
    # SaveSubmissionFile
    submission_robust.to_csv('submission.csv', index=True)
    print(f"Submission file saved: submission.csv")
    print(f"Submission file shape: {submission_robust.shape}")
    print("Preview first few rows:")
    print(submission_robust.head())

In [None]:
if len(submission_list) > 0:
    submission = pd.concat(submission_list)
else:
    # 提供Default值以防万一
    submission = pd.DataFrame({
        'video_id': [438887472],
        'agent_id': ['mouse1'],
        'target_id': ['self'], 
        'action': ['rear'],
        'start_frame': [278],
        'stop_frame': [500]
    })
        
submission_robust = robustify(submission, test, 'test')
submission_robust.index.name = 'row_id'
    
# SaveSubmissionFile
submission_robust.to_csv('submission.csv', index=True)
print(f"Submission file saved: submission.csv")
print(f"Submission file shape: {submission_robust.shape}")
print("Preview first few rows:")
print(submission_robust.head())

In [None]:
'''
test22.ipynb코드 참고해서 boosting계열 쥐 행동분석 코드 짜서 새파일로 저장해줘

LightGBM을 바탕으로 학습한거로 MABe22 데이터중 10%를 예측하고 이걸 바탕으로 전체 증강된 데이터를 XGBoost모델로 새롭게 학습해서 결과 낼거야

'''