# Import and configs


In [1]:
!pip install --no-deps /kaggle/input/dependencies/koolbox-0.1.3-py3-none-any.whl


Processing /kaggle/input/dependencies/koolbox-0.1.3-py3-none-any.whl
Installing collected packages: koolbox
Successfully installed koolbox-0.1.3


In [2]:
import json

from collections import defaultdict

import pandas as pd
import polars as pl


class HostVisibleError(Exception):
    pass

def single_lab_f1(lab_solution: pl.DataFrame, lab_submission: pl.DataFrame, beta: float = 1) -> float:
    label_frames: defaultdict[str, set[int]] = defaultdict(set)
    prediction_frames: defaultdict[str, set[int]] = defaultdict(set)

    for row in lab_solution.to_dicts():
        label_frames[row['label_key']].update(range(row['start_frame'], row['stop_frame']))

    for video in lab_solution['video_id'].unique():
        active_labels: str = lab_solution.filter(pl.col('video_id') == video)['behaviors_labeled'].first()  # ty: ignore
        active_labels: set[str] = set(json.loads(active_labels))
        predicted_mouse_pairs: defaultdict[str, set[int]] = defaultdict(set)

        for row in lab_submission.filter(pl.col('video_id') == video).to_dicts():
            # Since the labels are sparse, we can't evaluate prediction keys not in the active labels.
            if ','.join([str(row['agent_id']), str(row['target_id']), row['action']]) not in active_labels:
                continue

            new_frames = set(range(row['start_frame'], row['stop_frame']))
            
            # Ignore truly redundant predictions.
            new_frames = new_frames.difference(prediction_frames[row['prediction_key']])
            prediction_pair = ','.join([str(row['agent_id']), str(row['target_id'])])
            if predicted_mouse_pairs[prediction_pair].intersection(new_frames):
                # A single agent can have multiple targets per frame (ex: evading all other mice) but only one action per target per frame.
                raise HostVisibleError('Multiple predictions for the same frame from one agent/target pair')
            
            prediction_frames[row['prediction_key']].update(new_frames)
            predicted_mouse_pairs[prediction_pair].update(new_frames)

    tps = defaultdict(int)
    fns = defaultdict(int)
    fps = defaultdict(int)
    for key, pred_frames in prediction_frames.items():
        action = key.split('_')[-1]
        matched_label_frames = label_frames[key]
        tps[action] += len(pred_frames.intersection(matched_label_frames))
        fns[action] += len(matched_label_frames.difference(pred_frames))
        fps[action] += len(pred_frames.difference(matched_label_frames))

    distinct_actions = set()
    for key, frames in label_frames.items():
        action = key.split('_')[-1]
        distinct_actions.add(action)
        if key not in prediction_frames:
            fns[action] += len(frames)

    action_f1s = []
    for action in distinct_actions:
        if tps[action] + fns[action] + fps[action] == 0:
            action_f1s.append(0)
        else:
            action_f1s.append((1 + beta**2) * tps[action] / ((1 + beta**2) * tps[action] + beta**2 * fns[action] + fps[action]))
    return sum(action_f1s) / len(action_f1s)

def mouse_fbeta(solution: pd.DataFrame, submission: pd.DataFrame, beta: float = 1) -> float:

    if len(solution) == 0 or len(submission) == 0:
        raise ValueError('Missing solution or submission data')

    expected_cols = ['video_id', 'agent_id', 'target_id', 'action', 'start_frame', 'stop_frame']

    for col in expected_cols:
        if col not in solution.columns:
            raise ValueError(f'Solution is missing column {col}')
        if col not in submission.columns:
            raise ValueError(f'Submission is missing column {col}')

    solution: pl.DataFrame = pl.DataFrame(solution)
    submission: pl.DataFrame = pl.DataFrame(submission)
    assert (solution['start_frame'] <= solution['stop_frame']).all()
    assert (submission['start_frame'] <= submission['stop_frame']).all()
    solution_videos = set(solution['video_id'].unique())
    
    # Need to align based on video IDs as we can't rely on the row IDs for handling public/private splits.
    submission = submission.filter(pl.col('video_id').is_in(solution_videos))

    solution = solution.with_columns(
        pl.concat_str(
            [
                pl.col('video_id').cast(pl.Utf8),
                pl.col('agent_id').cast(pl.Utf8),
                pl.col('target_id').cast(pl.Utf8),
                pl.col('action'),
            ],
            separator='_',
        ).alias('label_key'),
    )
    submission = submission.with_columns(
        pl.concat_str(
            [
                pl.col('video_id').cast(pl.Utf8),
                pl.col('agent_id').cast(pl.Utf8),
                pl.col('target_id').cast(pl.Utf8),
                pl.col('action'),
            ],
            separator='_',
        ).alias('prediction_key'),
    )

    lab_scores = []
    for lab in solution['lab_id'].unique():
        lab_solution = solution.filter(pl.col('lab_id') == lab).clone()
        lab_videos = set(lab_solution['video_id'].unique())
        lab_submission = submission.filter(pl.col('video_id').is_in(lab_videos)).clone()
        lab_scores.append(single_lab_f1(lab_solution, lab_submission, beta=beta))

    return sum(lab_scores) / len(lab_scores)

def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, beta: float = 1) -> float:
    solution = solution.drop(row_id_column_name, axis='columns', errors='ignore')
    submission = submission.drop(row_id_column_name, axis='columns', errors='ignore')
    return mouse_fbeta(solution, submission, beta=beta)

In [3]:
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import f1_score
from sklearn.base import clone
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from tqdm.notebook import tqdm
from koolbox import Trainer
import numpy as np
import itertools
import optuna
import warnings
import joblib
import glob
import gc
import logging

warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

In [4]:

class CFG:
    train_path = "/kaggle/input/MABe-mouse-behavior-detection/train.csv"
    test_path = "/kaggle/input/MABe-mouse-behavior-detection/test.csv"
    train_annotation_path = "/kaggle/input/MABe-mouse-behavior-detection/train_annotation"
    train_tracking_path = "/kaggle/input/MABe-mouse-behavior-detection/train_tracking"
    test_tracking_path = "/kaggle/input/MABe-mouse-behavior-detection/test_tracking"

    model_path = "/kaggle/input/my-model"
    
    # mode = "validate"
    mode = "submit"
    
    n_splits = 3
    cv = StratifiedGroupKFold(n_splits)

# Loading and preprocessing

In [5]:
train = pd.read_csv(CFG.train_path)
test = pd.read_csv(CFG.test_path)

train["n_mice"] = 4 - train[["mouse1_strain", "mouse2_strain", "mouse3_strain", "mouse4_strain"]].isna().sum(axis=1)
train_without_mbae = train.query("~lab_id.str.startswith('MABe22_')")


In [6]:
# get unique raw entries
body_parts_tracked_list = list(np.unique(train.body_parts_tracked))

## Creating label dataframe

In [7]:
def create_solution_df(dataset):
    solution = []
    missing_file = []
    for _, row in tqdm(dataset.iterrows(), total=len(dataset)):
        lab_id = row['lab_id']
        
        if lab_id.startswith('MABe22'): 
            continue
            
        video_id = row['video_id']
        path = f"{CFG.train_annotation_path}/{lab_id}/{video_id}.parquet"
        try:
            anno = pd.read_parquet(path)
        except FileNotFoundError:
            missing_file.append(path)
            continue

        anno['lab_id'] = lab_id
        anno['video_id'] = video_id
        anno['behaviors_labeled'] = row['behaviors_labeled']
        
        anno['target_id'] = np.where(anno.target_id != anno.agent_id, anno['target_id'].apply(lambda s: f"mouse{s}"), 'self')
        anno['agent_id'] = anno['agent_id'].apply(lambda s: f"mouse{s}")

        solution.append(anno)
    
    solution = pd.concat(solution)
    return solution, missing_file

# use method above to create ground truth df
if CFG.mode == 'validate':
    solution, missing = create_solution_df(train_without_mbae)
    logging.warning("Files not found:")
    print(missing)

In [8]:
DROP_BODY_PARTS = [
    'headpiece_bottombackleft', 'headpiece_bottombackright',
    'headpiece_bottomfrontleft', 'headpiece_bottomfrontright',
    'headpiece_topbackleft', 'headpiece_topbackright',
    'headpiece_topfrontleft', 'headpiece_topfrontright',
    'spine_1', 'spine_2',
    'tail_middle_1', 'tail_middle_2', 'tail_midpoint'
]

def generate_mouse_data(dataset, mode=None, is_train=True):
    if is_train:
        data_dir = CFG.train_tracking_path
    else:
        data_dir = CFG.test_tracking_path

    for _, row in dataset.iterrows():
        lab_id = row.lab_id

        # skip if MABe lab or not string behaviors_labeled   
        if lab_id.startswith("MABe22") or not isinstance(row.behaviors_labeled, str):
            continue

        video_id = row.video_id
        tracking_path =  f"{data_dir}/{lab_id}/{video_id}.parquet";

        vid = pd.read_parquet(tracking_path)

        # > 5 bodyparts -> drop
        if len(np.unique(vid.bodypart)) > 5:
            vid = vid.query("~bodypart.isin(@DROP_BODY_PARTS)")

        # pivot
        pvid = vid.pivot(
            index="video_frame",
            columns=["mouse_id", "bodypart"],
            values=["x", "y"],
        )

        # delete vid for memory
        del vid
        gc.collect()

        # (coor, mouse, bodypart) -> (mouse, bodypart, coor) -> sorted columns 
        pvid = pvid.reorder_levels([1, 2, 0], axis=1).T.sort_index().T
        
        # pix to cm
        pvid = pvid / row.pix_per_cm_approx
        
        # behaviors_labeled is JSON list
        raw_behaviors = json.loads(row.behaviors_labeled)
        
        # remove ', duplicate by set then sort
        cleaned = {b.replace("'", "") for b in raw_behaviors} 
        cleaned = sorted(list(cleaned))

        # split into 3 cols
        behaviors_split = [b.split(",") for b in cleaned]
        vid_beh = pd.DataFrame(behaviors_split, columns=["agent", "target", "action"])

        if is_train:
            try: 
                anno_path = tracking_path.replace("train_tracking", "train_annotation")
                anno = pd.read_parquet(anno_path)
            except FileNotFoundError:
                continue
        
        # ---- SINGLE MOUSE ----
        if mode is None or mode == 'single' :
            # only get target == self
            vid_beh_single = vid_beh.query("target == 'self'")

            for agent_str in np.unique(vid_beh_single.agent):
                try:
                    # get the id (the last element of agent_str)
                    mouse_id = int(agent_str[-1])
                    
                    # get all action of this agent 
                    agent_actions = np.unique(vid_beh_single.query("agent == @agent_str").action)

                    # get tracking of this agent
                    single_mouse = pvid.loc[:, mouse_id] 
                    assert len(single_mouse) == len(pvid)
                
                    single_meta = pd.DataFrame({
                        "video_id": video_id,
                        "agent_id": agent_str,
                        "target_id": "self",
                        "video_frame": single_mouse.index, # index by frames
                    })

                    if is_train:
                        single_label = pd.DataFrame(0.0, columns=agent_actions, index=single_mouse.index)
                        anno_single = anno.query("(agent_id == @mouse_id) & (target_id == @mouse_id)")

                        for _, anno_row in anno_single.iterrows():
                            start = anno_row['start_frame']
                            end = anno_row['stop_frame']
                            action = anno_row['action']
                            single_label.loc[start:end, action] = 1.0

                        yield "single", single_mouse, single_meta, single_label

                    else:
                        yield "single", single_mouse, single_meta, agent_actions
                    
                except KeyError:
                    continue
        
        # ---- PAIR MOUSE ----
        if mode is None or mode == 'pair':
            # only get target != 'self'
            vid_behaviors_pair = vid_beh.query("target != 'self'")

            if len(vid_behaviors_pair) == 0:
                continue

            # get list of mouse_ids
            mouse_ids = np.unique(pvid.columns.get_level_values("mouse_id"))

            # permutation (agent, target) with agent != target
            for agent_id, target_id in itertools.permutations(mouse_ids, 2):
                agent_str = f"mouse{agent_id}"
                target_str = f"mouse{target_id}"

                # action of this (agent, target)
                pair_actions = np.unique(
                    vid_behaviors_pair.query("(agent == @agent_str) & (target == @target_str)").action
                )

                # tracking of these 2 mice
                mouse_pair = pd.concat(
                    [pvid[agent_id], pvid[target_id]],
                    axis=1,
                    keys=["A", "B"],  # A = agent, B = target
                )
                assert len(mouse_pair) == len(pvid)

                # metadata 
                pair_meta = pd.DataFrame({
                    "video_id": video_id,
                    "agent_id": agent_str,
                    "target_id": target_str,
                    "video_frame": mouse_pair.index,
                })

                if is_train:
                    # label: frame × action
                    pair_label = pd.DataFrame(0.0, columns=pair_actions, index=mouse_pair.index)
                    anno_pair = anno.query(
                        "(agent_id == @agent_id) & (target_id == @target_id)"
                    )

                    for _, anno_row in anno_pair.iterrows():
                        start = anno_row["start_frame"]
                        end = anno_row["stop_frame"]
                        action = anno_row["action"]
                        pair_label.loc[start:end, action] = 1.0

                    yield "pair", mouse_pair, pair_meta, pair_label

                else:
                    # test/val: list action
                    yield "pair", mouse_pair, pair_meta, pair_actions
    

# Feature Engineering


In [9]:
# helper

def scaled_window(n_frames_30fps , fps , min_frac=0.2, min_abs=1):
    ws = max(1, int(round(n_frames_30fps * float(fps) / 30.0)))
    min_periods = max(min_abs, int(round(ws * min_frac)))
    return ws, min_periods


def _fps_from_meta(meta_df, fallback_lookup: dict, default_fps: float = 30.0):
    if "frames_per_second" in meta_df.columns and pd.notnull(meta_df["frames_per_second"]).any():
        return float(meta_df["frames_per_second"].iloc[0])
    vid = meta_df["video_id"].iloc[0]
    return float(fallback_lookup.get(vid, default_fps))

def _scale(n_frames_at_30fps, fps, ref=30.0):
    return max(1, int(round(n_frames_at_30fps * float(fps) / ref)))

def _scale_signed(n_frames_at_30fps, fps, ref=30.0):
    if n_frames_at_30fps == 0:
        return 0
    s = 1 if n_frames_at_30fps > 0 else -1
    mag = max(1, int(round(abs(n_frames_at_30fps) * float(fps) / ref)))
    return s * mag

In [10]:
# feature for each mouse

def add_curvature_features(X, center_x, center_y, fps):
    # velocity & acceleration
    vx = center_x.diff()
    vy = center_y.diff()
    ax = vx.diff()
    ay = vy.diff()

    # curve ~ |v × a| / |v|^3
    cross = vx * ay - vy * ax
    speed = np.sqrt(vx**2 + vy**2)
    curvature = np.abs(cross) / (speed**3 + 1e-6)

    # avg curve in scales
    for base_w in [25, 50, 75]:
        ws, mp = scaled_window(base_w, fps, min_frac=0.2)
        X[f"curv_mean_{base_w}"] = curvature.rolling(ws, min_periods=mp).mean()
    
    angle = np.arctan2(vy, vx)
    angle_change = np.abs(angle.diff())
    ws, mp = scaled_window(30, fps, min_frac=0.2)
    X["turn_rate_30"] = angle_change.rolling(ws, min_periods=mp).sum()

    return X

def add_multiscale_features(X, center_x, center_y, fps):
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)

    scales = [20, 40, 60, 80]
    for base_w in scales:
        ws, mp = scaled_window(base_w, fps, min_frac=0.25)
        if len(speed) >= ws:
            X[f"sp_m{base_w}"] = speed.rolling(ws, min_periods=mp).mean()
            X[f"sp_s{base_w}"] = speed.rolling(ws, min_periods=mp).std()

    if all(f"sp_m{s}" in X.columns for s in (scales[0], scales[-1])):
        X["sp_ratio"] = X[f"sp_m{scales[0]}"] / (X[f"sp_m{scales[-1]}"] + 1e-6)

    return X

def add_state_features(X, center_x, center_y, fps):
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)

    ws_ma, mp_ma = scaled_window(15, fps, min_frac=1/3)
    speed_ma = speed.rolling(ws_ma, min_periods=mp_ma).mean()

    try:
        bins = [-np.inf, 0.5 * fps, 2.0 * fps, 5.0 * fps, np.inf]
        speed_states = pd.cut(speed_ma, bins=bins, labels=[0, 1, 2, 3]).astype(float)

        for base_w in [20, 40, 60, 80]:
            ws, mp = scaled_window(base_w, fps, min_frac=0.2)
            if len(speed_states) < ws:
                continue

            for state in [0, 1, 2, 3]:
                X[f"s{state}_{base_w}"] = (
                    (speed_states == state)
                    .astype(float)
                    .rolling(ws, min_periods=mp)
                    .mean()
                )

            state_changes = (speed_states != speed_states.shift(1)).astype(float)
            X[f"trans_{base_w}"] = state_changes.rolling(ws, min_periods=mp).sum()
    except Exception:
        pass

    return X

def add_longrange_features(X, center_x, center_y, fps):
    # longrange moving average
    for base_w in [30, 60, 120]:
        ws, mp = scaled_window(base_w, fps, min_frac=1/6, min_abs=5)
        if len(center_x) >= ws:
            X[f"x_ml{base_w}"] = center_x.rolling(ws, min_periods=mp).mean()
            X[f"y_ml{base_w}"] = center_y.rolling(ws, min_periods=mp).mean()
        
    # EWMA 
    for span in [30, 60, 120]:
        s, _ = scaled_window(span, fps, min_frac=0.0)  # min_periods sẽ set riêng
        X[f"x_e{span}"] = center_x.ewm(span=s, min_periods=1).mean()
        X[f"y_e{span}"] = center_y.ewm(span=s, min_periods=1).mean()

    # percentile rank of speed
    speed = np.sqrt(center_x.diff()**2 + center_y.diff()**2) * float(fps)
    for base_w in [30, 60, 120]:
        ws, mp = scaled_window(base_w, fps, min_frac=1 / 6, min_abs=5)
        if len(speed) >= ws:
            X[f"sp_pct{base_w}"] = speed.rolling(ws, min_periods=mp).rank(pct=True)

    return X

In [11]:
def add_single_extra_features(X, single_mouse, available_parts, fps):
    # posture curvature
    if all(p in available_parts for p in ['nose', 'body_center', 'tail_base']):
        # body_center - tail_base vs nose - body_center
        v1 = single_mouse['body_center'] - single_mouse['tail_base']
        v2 = single_mouse['nose'] - single_mouse['body_center']

        dot = v1['x'] * v2['x'] + v1['y'] * v2['y']
        n1 = np.sqrt(v1['x']**2 + v1['y']**2)
        n2 = np.sqrt(v2['x']**2 + v2['y']**2)

        X['pose_curv'] = (dot / (n1 * n2 + 1e-6)).astype(np.float32)

    # verticality/ rearing proxy
    if all(p in available_parts for p in ['nose', 'lateral_left', 'lateral_right']):
        nose_x = single_mouse['nose']['x']
        nose_y = single_mouse['nose']['y']
        lat_x = (single_mouse['lateral_left']['x'] + single_mouse['lateral_right']['x']) / 2.0
        lat_y = (single_mouse['lateral_left']['y'] + single_mouse['lateral_right']['y']) / 2.0

        nose_lat_dist = np.sqrt((nose_x - lat_x)**2 + (nose_y - lat_y)**2)
        X["nose_lateral_dist"] = nose_lat_dist.astype(np.float32)
        X["nose_lateral_vel"] = nose_lat_dist.diff().astype(np.float32)

    # ear dynamics
    if all(p in available_parts for p in ['ear_left', 'ear_right']):
        ear_d = np.sqrt(
            (single_mouse['ear_left']['x'] - single_mouse['ear_right']['x'])**2 +
            (single_mouse['ear_left']['y'] - single_mouse['ear_right']['y'])**2
        )
        X["ear_vel"] = ear_d.diff().astype(np.float32)
        X["ear_acc"] = ear_d.diff().diff().astype(np.float32)

    return X

In [12]:
def transform_single(single_mouse, body_parts_tracked, fps):
    available_parts = single_mouse.columns.get_level_values(0)

    # pairwise distance between body part (p1, p2) ---
    features = {}

    for p1, p2 in itertools.combinations(body_parts_tracked, 2):
        if p1 in available_parts and p2 in available_parts:
            diff = single_mouse[p1] - single_mouse[p2]      # (x,y) or (x,y,...) by frame
            dist2 = np.square(diff).sum(axis=1, skipna=False)
            features[f"{p1}+{p2}"] = dist2

    X = pd.DataFrame(features)

    # ensure order
    full_cols = [f"{p1}+{p2}" for p1, p2 in itertools.combinations(body_parts_tracked, 2)]
    X = X.reindex(columns=full_cols, copy=False)

    # raw speed by ear and tail (lag ~10 frame) ---
    if all(p in available_parts for p in ['ear_left', 'ear_right', 'tail_base']):
        lag = _scale(10, fps)
        past = single_mouse[['ear_left', 'ear_right', 'tail_base']].shift(lag)

        sp_lf  = np.square(single_mouse['ear_left']  - past['ear_left']).sum(axis=1, skipna=False)
        sp_rt  = np.square(single_mouse['ear_right'] - past['ear_right']).sum(axis=1, skipna=False)
        sp_lf2 = np.square(single_mouse['ear_left']  - past['tail_base']).sum(axis=1, skipna=False)
        sp_rt2 = np.square(single_mouse['ear_right'] - past['tail_base']).sum(axis=1, skipna=False)

        X[['sp_lf', 'sp_rt', 'sp_lf2', 'sp_rt2']] = np.column_stack([sp_lf, sp_rt, sp_lf2, sp_rt2])

    # elongation
    if 'nose+tail_base' in X.columns and 'ear_left+ear_right' in X.columns:
        X['elong'] = X['nose+tail_base'] / (X['ear_left+ear_right'] + 1e-6)

    # body angle
    if all(p in available_parts for p in ['nose', 'body_center', 'tail_base']):
        v1 = single_mouse['nose']      - single_mouse['body_center']
        v2 = single_mouse['tail_base'] - single_mouse['body_center']

        dot = v1['x'] * v2['x'] + v1['y'] * v2['y']
        norm1 = np.sqrt(v1['x']**2 + v1['y']**2)
        norm2 = np.sqrt(v2['x']**2 + v2['y']**2)
        body_ang = dot / (norm1 * norm2 + 1e-6)

        X['body_ang'] = body_ang.astype(np.float32)
        X['body_ang_vel'] = body_ang.diff().astype(np.float32)
        X['body_ang_acc'] = body_ang.diff().diff().astype(np.float32)

    # features by body_center
    if 'body_center' in available_parts:
        cx = single_mouse['body_center']['x']
        cy = single_mouse['body_center']['y']

        for base_w in [5, 15, 30, 60]:
            ws = _scale(base_w, fps)
            roll_kwargs = dict(window=ws, min_periods=1, center=True)

            X[f'cx_m{base_w}'] = cx.rolling(**roll_kwargs).mean()
            X[f'cy_m{base_w}'] = cy.rolling(**roll_kwargs).mean()
            X[f'cx_s{base_w}'] = cx.rolling(**roll_kwargs).std()
            X[f'cy_s{base_w}'] = cy.rolling(**roll_kwargs).std()

            X[f'x_rng{base_w}'] = cx.rolling(**roll_kwargs).max() - cx.rolling(**roll_kwargs).min()
            X[f'y_rng{base_w}'] = cy.rolling(**roll_kwargs).max() - cy.rolling(**roll_kwargs).min()

            # displacement & activity (from diff)
            dx = cx.diff()
            dy = cy.diff()
            disp = np.sqrt(dx.rolling(ws, min_periods=1).sum()**2 + dy.rolling(ws, min_periods=1).sum()**2)
            act = np.sqrt(dx.rolling(ws, min_periods=1).var() + dy.rolling(ws, min_periods=1).var())
            X[f'disp{base_w}'] = disp
            X[f'act{base_w}']  = act

        # advanced feature
        X = add_curvature_features(X, cx, cy, fps)
        X = add_multiscale_features(X, cx, cy, fps)
        X = add_state_features(X, cx, cy, fps)
        X = add_longrange_features(X, cx, cy, fps)

    # nose–tail_based distance by time 
    if all(p in available_parts for p in ['nose', 'tail_base']):
        nt = np.sqrt(
            (single_mouse['nose']['x'] - single_mouse['tail_base']['x'])**2 +
            (single_mouse['nose']['y'] - single_mouse['tail_base']['y'])**2
        )
        for lag in [10, 20, 40]:
            l = _scale(lag, fps)
            X[f'nt_lg{lag}'] = nt.shift(l)
            X[f'nt_df{lag}'] = nt - nt.shift(l)

    # ear distance & consistency
    if all(p in available_parts for p in ['ear_left', 'ear_right']):
        ear_d = np.sqrt(
            (single_mouse['ear_left']['x']   - single_mouse['ear_right']['x'])**2 +
            (single_mouse['ear_left']['y']   - single_mouse['ear_right']['y'])**2
        )
        # offset
        for off in [-30, -20, -10, 10, 20, 30]:
            o = _scale_signed(off, fps)
            X[f'ear_o{off}'] = ear_d.shift(-o)

        w = _scale(30, fps)
        roll_c = dict(window=w, min_periods=1, center=True)
        ear_mean = ear_d.rolling(**roll_c).mean()
        ear_std  = ear_d.rolling(**roll_c).std()
        X['ear_con'] = ear_std / (ear_mean + 1e-6)

    X = add_single_extra_features(X, single_mouse, available_parts, fps)

    return X.astype(np.float32, copy=False)

In [13]:
# feature for mice interaction
def add_interaction_features(X, mouse_pair, avail_A, avail_B, fps):
    if "body_center" not in avail_A or "body_center" not in avail_B:
        return X
    
    # coor
    Ax = mouse_pair["A"]["body_center"]["x"]
    Ay = mouse_pair["A"]["body_center"]["y"]
    Bx = mouse_pair["B"]["body_center"]["x"]
    By = mouse_pair["B"]["body_center"]["y"]

    # relative 
    rel_x = Ax - Bx
    rel_y = Ay - By
    rel_dist = np.sqrt(rel_x**2 + rel_y**2)

    # velocity
    A_vx = Ax.diff()
    A_vy = Ay.diff()
    B_vx = Bx.diff()
    B_vy = By.diff()

    # cosine angle between vector (A, B) and velocity vector
    A_lead = (A_vx * rel_x + A_vy * rel_y) / (np.sqrt(A_vx**2 + A_vy**2) * rel_dist + 1e-6)
    B_lead = (B_vx * (-rel_x) + B_vy * (-rel_y)) / (np.sqrt(B_vx**2 + B_vy**2) * rel_dist + 1e-6)

    # avg of A_lead, B_lead on windows
    for base_w in [30, 60]:
        ws, mp = scaled_window(base_w, fps, min_frac=1 / 6)
        X[f"A_ld{base_w}"] = A_lead.rolling(ws, min_periods=mp).mean()
        X[f"B_ld{base_w}"] = B_lead.rolling(ws, min_periods=mp).mean()
    
    # approach
    approach = -rel_dist.diff()
    chase = approach * B_lead
    ws, mp = scaled_window(30, fps, min_frac=1/6)
    X["chase_30"] = chase.rolling(ws, min_periods=mp).mean()

    # correlation of 2 mice speed in long windows
    A_sp = np.sqrt(A_vx**2 + A_vy**2)
    B_sp = np.sqrt(B_vx**2 + B_vy**2)
    for base_w in [60, 120]:
        ws, mp = scaled_window(base_w, fps, min_frac=1 / 6)
        X[f"sp_cor{base_w}"] = A_sp.rolling(ws, min_periods=mp).corr(B_sp)

    return X


In [14]:
def add_egocentric_interaction_features(X, mouse_pair, avail_A, avail_B, fps):
    # cjeck condition
    ok_A = all(p in avail_A for p in ['nose', 'tail_base', 'body_center'])
    ok_B = all(p in avail_B for p in ['nose', 'tail_base', 'body_center'])
    if not (ok_A and ok_B):
        return X

    # position
    Ax = mouse_pair['A']['body_center']['x']
    Ay = mouse_pair['A']['body_center']['y']
    Bx = mouse_pair['B']['body_center']['x']
    By = mouse_pair['B']['body_center']['y']

    # head direction of A/B
    headA_x = mouse_pair['A']['nose']['x'] - mouse_pair['A']['tail_base']['x']
    headA_y = mouse_pair['A']['nose']['y'] - mouse_pair['A']['tail_base']['y']
    headB_x = mouse_pair['B']['nose']['x'] - mouse_pair['B']['tail_base']['x']
    headB_y = mouse_pair['B']['nose']['y'] - mouse_pair['B']['tail_base']['y']

    # vector A → B
    relAB_x = Bx - Ax
    relAB_y = By - Ay

    # cos / sin angle between A and vector A→B
    dotA = headA_x * relAB_x + headA_y * relAB_y
    norm_headA = np.sqrt(headA_x**2 + headA_y**2) + 1e-6
    norm_relAB = np.sqrt(relAB_x**2 + relAB_y**2) + 1e-6

    cos_bearing_A = dotA / (norm_headA * norm_relAB)
    # sign of cross product → trái/phải
    crossA = headA_x * relAB_y - headA_y * relAB_x
    sin_bearing_A = crossA / (norm_headA * norm_relAB)

    X['A_bearing_cos'] = cos_bearing_A.astype(np.float32)
    X['A_bearing_sin'] = sin_bearing_A.astype(np.float32)

    # for B
    relBA_x = Ax - Bx
    relBA_y = Ay - By
    dotB = headB_x * relBA_x + headB_y * relBA_y
    norm_headB = np.sqrt(headB_x**2 + headB_y**2) + 1e-6
    norm_relBA = np.sqrt(relBA_x**2 + relBA_y**2) + 1e-6

    cos_bearing_B = dotB / (norm_headB * norm_relBA)
    crossB = headB_x * relBA_y - headB_y * relBA_x
    sin_bearing_B = crossB / (norm_headB * norm_relBA)

    X['B_bearing_cos'] = cos_bearing_B.astype(np.float32)
    X['B_bearing_sin'] = sin_bearing_B.astype(np.float32)

    # rolling stats 
    for base_w in [15, 30]:
        ws = _scale(base_w, fps)
        roll = dict(window=ws, min_periods=1, center=True)
        X[f'A_bearing_cos_m{base_w}'] = X['A_bearing_cos'].rolling(**roll).mean()
        X[f'A_bearing_sin_m{base_w}'] = X['A_bearing_sin'].rolling(**roll).mean()
        X[f'B_bearing_cos_m{base_w}'] = X['B_bearing_cos'].rolling(**roll).mean()
        X[f'B_bearing_sin_m{base_w}'] = X['B_bearing_sin'].rolling(**roll).mean()

    return X


In [15]:
def add_asymmetry_features(X, mouse_pair, avail_A, avail_B, fps):
    # need body_center to define speeds
    if 'body_center' not in avail_A or 'body_center' not in avail_B:
        return X

    Ax = mouse_pair['A']['body_center']['x']
    Ay = mouse_pair['A']['body_center']['y']
    Bx = mouse_pair['B']['body_center']['x']
    By = mouse_pair['B']['body_center']['y']

    # velocities (frame-wise differences)
    A_vx = Ax.diff()
    A_vy = Ay.diff()
    B_vx = Bx.diff()
    B_vy = By.diff()

    # instantaneous speeds
    A_sp = np.sqrt(A_vx**2 + A_vy**2)
    B_sp = np.sqrt(B_vx**2 + B_vy**2)

    # asymmetry: difference and ratio
    sp_diff = A_sp - B_sp
    sp_ratio = A_sp / (B_sp + 1e-6)

    X['sp_diff_inst'] = sp_diff.astype(np.float32)
    X['sp_ratio_inst'] = sp_ratio.astype(np.float32)

    # rolling stats over short/medium windows
    for base_w in [30, 60]:
        ws = _scale(base_w, fps)
        roll = dict(window=ws, min_periods=1, center=True)

        X[f'sp_diff_m{base_w}'] = (
            sp_diff.rolling(**roll).mean().astype(np.float32)
        )
        X[f'sp_ratio_m{base_w}'] = (
            sp_ratio.rolling(**roll).mean().astype(np.float32)
        )

    return X


In [16]:
def transform_pair(mouse_pair, body_parts_tracked, fps):
    avail_A = mouse_pair['A'].columns.get_level_values(0)
    avail_B = mouse_pair['B'].columns.get_level_values(0)

    # pairwise distance between A[p1] and B[p2]
    features = {}
    for p1, p2 in itertools.product(body_parts_tracked, repeat=2):
        if p1 in avail_A and p2 in avail_B:
            diff = mouse_pair['A'][p1] - mouse_pair['B'][p2]
            dist2 = np.square(diff).sum(axis=1, skipna=False)
            features[f"12+{p1}+{p2}"] = dist2

    X = pd.DataFrame(features)
    full_cols = [f"12+{p1}+{p2}" for p1, p2 in itertools.product(body_parts_tracked, repeat=2)]
    X = X.reindex(columns=full_cols, copy=False)

    # ear-left speed A/B (lag ~10 frame)
    if ('A', 'ear_left') in mouse_pair.columns and ('B', 'ear_left') in mouse_pair.columns:
        lag = _scale(10, fps)
        shA = mouse_pair['A']['ear_left'].shift(lag)
        shB = mouse_pair['B']['ear_left'].shift(lag)

        sp_A  = np.square(mouse_pair['A']['ear_left'] - shA).sum(axis=1, skipna=False)
        sp_AB = np.square(mouse_pair['A']['ear_left'] - shB).sum(axis=1, skipna=False)
        sp_B  = np.square(mouse_pair['B']['ear_left'] - shB).sum(axis=1, skipna=False)

        X[['sp_A', 'sp_AB', 'sp_B']] = np.column_stack([sp_A, sp_AB, sp_B])

    # elong = dist(nose, tail_base) / dist(ear_left, ear_right)
    have_A_elong = all(p in avail_A for p in ['nose', 'tail_base', 'ear_left', 'ear_right'])
    have_B_elong = all(p in avail_B for p in ['nose', 'tail_base', 'ear_left', 'ear_right'])

    if have_A_elong:
        nose_A = mouse_pair['A']['nose']
        tail_A = mouse_pair['A']['tail_base']
        el_A_l = mouse_pair['A']['ear_left']
        el_A_r = mouse_pair['A']['ear_right']

        nose_tail_A = np.square(nose_A - tail_A).sum(axis=1, skipna=False)
        ear_dist_A  = np.square(el_A_l - el_A_r).sum(axis=1, skipna=False)
        X['elong_A'] = nose_tail_A / (ear_dist_A + 1e-6)

    if have_B_elong:
        nose_B = mouse_pair['B']['nose']
        tail_B = mouse_pair['B']['tail_base']
        el_B_l = mouse_pair['B']['ear_left']
        el_B_r = mouse_pair['B']['ear_right']

        nose_tail_B = np.square(nose_B - tail_B).sum(axis=1, skipna=False)
        ear_dist_B  = np.square(el_B_l - el_B_r).sum(axis=1, skipna=False)
        X['elong_B'] = nose_tail_B / (ear_dist_B + 1e-6)

    # diff and ratio
    if 'elong_A' in X.columns and 'elong_B' in X.columns:
        X['elong_diff']  = X['elong_A'] - X['elong_B']
        X['elong_ratio'] = X['elong_A'] / (X['elong_B'] + 1e-6)

    # relative body angle between A and B
    if all(p in avail_A for p in ['nose', 'tail_base']) and all(p in avail_B for p in ['nose', 'tail_base']):
        dir_A = mouse_pair['A']['nose'] - mouse_pair['A']['tail_base']
        dir_B = mouse_pair['B']['nose'] - mouse_pair['B']['tail_base']

        dot = dir_A['x'] * dir_B['x'] + dir_A['y'] * dir_B['y']
        nA = np.sqrt(dir_A['x']**2 + dir_A['y']**2)
        nB = np.sqrt(dir_B['x']**2 + dir_B['y']**2)
        X['rel_ori'] = dot / (nA * nB + 1e-6)

    # nose-nose approach
    if 'nose' in avail_A and 'nose' in avail_B:
        nn_cur = np.square(mouse_pair['A']['nose'] - mouse_pair['B']['nose']).sum(axis=1, skipna=False)
        lag = _scale(10, fps)
        shA_n = mouse_pair['A']['nose'].shift(lag)
        shB_n = mouse_pair['B']['nose'].shift(lag)
        nn_past = np.square(shA_n - shB_n).sum(axis=1, skipna=False)
        X['appr'] = nn_cur - nn_past

    # distance categories by body_center
    if 'body_center' in avail_A and 'body_center' in avail_B:
        Ax = mouse_pair['A']['body_center']['x']
        Ay = mouse_pair['A']['body_center']['y']
        Bx = mouse_pair['B']['body_center']['x']
        By = mouse_pair['B']['body_center']['y']

        cd = np.sqrt((Ax - Bx)**2 + (Ay - By)**2)

        X['v_cls'] = (cd < 5.0).astype(float)
        X['cls']   = ((cd >= 5.0)  & (cd < 15.0)).astype(float)
        X['med']   = ((cd >= 15.0) & (cd < 30.0)).astype(float)
        X['far']   = (cd >= 30.0).astype(float)

        # stats on squared distance
        cd2 = np.square(mouse_pair['A']['body_center'] - mouse_pair['B']['body_center']).sum(axis=1, skipna=False)
        
        for base_w in [5, 15, 30, 60]:
            ws = _scale(base_w, fps)
            roll_c = dict(window=ws, min_periods=1, center=True)
    
            X[f'd_m{base_w}']  = cd2.rolling(**roll_c).mean()
            X[f'd_s{base_w}']  = cd2.rolling(**roll_c).std()
            X[f'd_mn{base_w}'] = cd2.rolling(**roll_c).min()
            X[f'd_mx{base_w}'] = cd2.rolling(**roll_c).max()
    
            d_var = cd2.rolling(**roll_c).var()
            X[f'int{base_w}'] = 1.0 / (1.0 + d_var)
    
            # dot product vận tốc body_center
            Axd = Ax.diff()
            Ayd = Ay.diff()
            Bxd = Bx.diff()
            Byd = By.diff()
            coord = Axd * Bxd + Ayd * Byd
            X[f'co_m{base_w}'] = coord.rolling(**roll_c).mean()
            X[f'co_s{base_w}'] = coord.rolling(**roll_c).std()

            # cosine similarity A,B speed (offset) ---
            Avx = Ax.diff()
            Avy = Ay.diff()
            Bvx = Bx.diff()
            Bvy = By.diff()
            vel_cos = (Avx * Bvx + Avy * Bvy) / (
                np.sqrt(Avx**2 + Avy**2) * np.sqrt(Bvx**2 + Bvy**2) + 1e-6
            )
        
            for off in [-30, -20, -10, 0, 10, 20, 30]:
                o = _scale_signed(off, fps)
                X[f'va_{off}'] = vel_cos.shift(-o)
        
            w = _scale(30, fps)
            roll_c30 = dict(window=w, min_periods=1, center=True)
            cd2_mean = cd2.rolling(**roll_c30).mean()
            cd2_std  = cd2.rolling(**roll_c30).std()
            X['int_con'] = cd2_std / (cd2_mean + 1e-6)

            # advanced features
            X = add_asymmetry_features(X, mouse_pair, avail_A, avail_B, fps)
            X = add_egocentric_interaction_features(X, mouse_pair, avail_A, avail_B, fps)
            X = add_interaction_features(X, mouse_pair, avail_A, avail_B, fps)

    # nose-nose distance + close percentage
    if 'nose' in avail_A and 'nose' in avail_B:
        nn = np.sqrt(
            (mouse_pair['A']['nose']['x'] - mouse_pair['B']['nose']['x'])**2 +
            (mouse_pair['A']['nose']['y'] - mouse_pair['B']['nose']['y'])**2
        )
        for lag in [10, 20, 40]:
            l = _scale(lag, fps)
            X[f'nn_lg{lag}'] = nn.shift(l)
            X[f'nn_ch{lag}'] = nn - nn.shift(l)
            is_close = (nn < 10.0).astype(float)
            X[f'cl_ps{lag}'] = is_close.rolling(l, min_periods=1).mean()
    

    return X.astype(np.float32, copy=False)

# Preparation for training

In [17]:
def clean_and_fill_submission(submission, meta_df, is_train=True):
    if is_train:
        tracking_dir = CFG.train_tracking_path
    else: 
        tracking_dir = CFG.test_tracking_path
    
    # remove where start >= stop
    prev_len = len(submission)
    submission = submission[submission['start_frame'] < submission['stop_frame']].copy()
    if len(submission) != prev_len:
        print("Dropped rows with start_frame > stop_frame")
    
    # remove overlap
    prev_len = len(submission)
    cleaned_groups = []

    for (_, grp) in submission.groupby(['video_id', 'agent_id', 'target_id']):
        grp = grp.sort_values('start_frame')
        keep_mask = np.ones(len(grp), dtype=bool)

        last_stop = -1
        for i, (_, row) in enumerate(grp.iterrows()):
            if row['start_frame'] < last_stop:
                keep_mask[i] = False
            else:
                last_stop = row['stop_frame']
        
        cleaned_groups.append(grp[keep_mask])

    submission = pd.concat(cleaned_groups, ignore_index=True)
    if len(submission) != prev_len:
        print("Dropped rows with overlapped intervals")   

    # dummy prediction for video have no prediction
    dummy_rows = []

    for _, row in meta_df.iterrows():
        lab_id = row["lab_id"]

        # remove MABe22 vids
        if isinstance(lab_id, str) and lab_id.startswith("MABe22"):
            continue
        
        # remove behaviors_labeled if not string
        if not isinstance(row.get("behaviors_labeled", None), str):
            continue

        video_id = row["video_id"]

        # if have prediction -> skip
        if (submission["video_id"] == video_id).any():
            continue

        print(f"Video {video_id} has no predictions. Filling dummy segments...")

        # read tracking
        path = f"{tracking_dir}/{lab_id}/{video_id}.parquet"
        vid = pd.read_parquet(path)

        # get list (agent, target, action) from meta
        raw_behaviors = json.loads(row["behaviors_labeled"])
        cleaned = {b.replace("'", "") for b in raw_behaviors}
        triplets = [b.split(",") for b in sorted(cleaned)]
        beh_df = pd.DataFrame(triplets, columns=["agent", "target", "action"])

        # get total frames of this video
        start_frame = vid["video_frame"].min()
        stop_frame = vid["video_frame"].max() + 1
        total_frames = stop_frame - start_frame

        # divide uniformly 
        for (agent, target), actions in beh_df.groupby(["agent", "target"]):
            n_actions = len(actions)
            if n_actions == 0:
                continue

            batch_len = int(np.ceil(total_frames / n_actions))

            for i, (_, act_row) in enumerate(actions.iterrows()):
                batch_start = start_frame + i * batch_len
                batch_stop = min(batch_start + batch_len, stop_frame)

                dummy_rows.append((
                    video_id,
                    act_row["agent"],
                    act_row["target"],
                    act_row["action"],
                    batch_start,
                    batch_stop,
                ))

    if dummy_rows:
        dummy_df = pd.DataFrame(
            dummy_rows,
            columns=["video_id", "agent_id", "target_id", "action", "start_frame", "stop_frame"],
        )
        submission = pd.concat([submission, dummy_df], ignore_index=True)
        print(f"Filled {len(dummy_rows)} dummy segments for empty videos")

    return submission


In [18]:
def predict_multiclass(pred, meta, thresholds, window=5, min_len=3, merge_gap=3):
    # 1) smoothing bằng rolling median
    if window > 1:
        pred_smoothed = pred.rolling(window=window, min_periods=1, center=True).mean()
    else:
        pred_smoothed = pred
    
    threshold_array = np.array([thresholds.get(col, 0.27) for col in pred.columns])
    margins = pred_smoothed.values - threshold_array[None, :]

    # 2) chọn action có margin lớn nhất
    ama = np.argmax(margins, axis=1)              # index action tốt nhất
    max_margin = margins[np.arange(len(ama)), ama]

    # Nếu max_margin < 0 => không action nào vượt ngưỡng -> gán -1
    ama = np.where(max_margin >= 0.0, ama, -1)
    ama = pd.Series(ama, index=meta.video_frame)

    # 3) detect change points
    changes_mask = (ama != ama.shift(1)).values
    ama_changes = ama[changes_mask]
    meta_changes = meta[changes_mask]
    
    mask = ama_changes.values >= 0
    # guard: nếu không có frame nào >=0 thì trả về rỗng
    if mask.size == 0 or mask.sum() == 0:
        return pd.DataFrame(
            columns=["video_id", "agent_id", "target_id", "action", "start_frame", "stop_frame"]
        )
    mask[-1] = False  # frame cuối chỉ dùng để xác định stop cho frame trước
    
    submission_part = pd.DataFrame({
        'video_id':   meta_changes['video_id'][mask].values,
        'agent_id':   meta_changes['agent_id'][mask].values,
        'target_id':  meta_changes['target_id'][mask].values,
        'action':     pred.columns[ama_changes[mask].values],
        'start_frame': ama_changes.index[mask],
        'stop_frame':  ama_changes.index[1:][mask[:-1]]
    })
    
    stop_video_id  = meta_changes['video_id'][1:][mask[:-1]].values
    stop_agent_id  = meta_changes['agent_id'][1:][mask[:-1]].values
    stop_target_id = meta_changes['target_id'][1:][mask[:-1]].values
    
    for i in range(len(submission_part)):
        video_id = submission_part.video_id.iloc[i]
        agent_id = submission_part.agent_id.iloc[i]
        target_id = submission_part.target_id.iloc[i]

        if i < len(stop_video_id):
            if (
                stop_video_id[i] != video_id
                or stop_agent_id[i] != agent_id
                or stop_target_id[i] != target_id
            ):
                new_stop_frame = meta.query("video_id == @video_id").video_frame.max() + 1
                submission_part.iat[i, submission_part.columns.get_loc('stop_frame')] = new_stop_frame
        else:
            new_stop_frame = meta.query("video_id == @video_id").video_frame.max() + 1
            submission_part.iat[i, submission_part.columns.get_loc('stop_frame')] = new_stop_frame

    # 4) FILTER TRƯỚC: bỏ đoạn quá ngắn (likely noise)
    if len(submission_part) == 0:
        return submission_part

    duration = submission_part.stop_frame - submission_part.start_frame
    submission_part = submission_part[duration >= min_len].reset_index(drop=True)

    # nếu filter xong rỗng thì trả luôn
    if len(submission_part) == 0:
        return submission_part

    # 5) MERGE SAU: merge các đoạn gần nhau cùng (video, agent, target, action)
    if merge_gap > 0:
        merged = []
        for (_, grp) in submission_part.groupby(["video_id", "agent_id", "target_id", "action"]):
            grp = grp.sort_values("start_frame").reset_index(drop=True)
            if len(grp) == 0:
                continue

            cur_start = grp.loc[0, "start_frame"]
            cur_stop  = grp.loc[0, "stop_frame"]
    
            for i in range(1, len(grp)):
                s = grp.loc[i, "start_frame"]
                e = grp.loc[i, "stop_frame"]
                if s - cur_stop <= merge_gap:
                    # merge
                    cur_stop = max(cur_stop, e)
                else:
                    merged.append((grp.video_id.iloc[0],
                                   grp.agent_id.iloc[0],
                                   grp.target_id.iloc[0],
                                   grp.action.iloc[0],
                                   cur_start, cur_stop))
                    cur_start, cur_stop = s, e
    
            merged.append((grp.video_id.iloc[0],
                           grp.agent_id.iloc[0],
                           grp.target_id.iloc[0],
                           grp.action.iloc[0],
                           cur_start, cur_stop))
    
        submission_part = pd.DataFrame(
            merged,
            columns=["video_id", "agent_id", "target_id", "action", "start_frame", "stop_frame"]
        )

    return submission_part

In [19]:
def optimize_ensemble_predictions(oof_pred_probs, y_action):
    def objective(trial):
        weights = [trial.suggest_float(model, -1, 1) for model in oof_pred_probs.keys()]
        weights /= np.sum(weights)
    
        pred_probs = np.zeros((oof_pred_probs[list(oof_pred_probs.keys())[0]].shape[0], ))
        for model, weight in zip(oof_pred_probs.keys(), weights):
            pred_probs += oof_pred_probs[model] * weight
        
        threshold = trial.suggest_float("threshold", 0, 1)
        return f1_score(y_action, pred_probs >= threshold, zero_division=0)

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=1000, n_jobs=-1)

    best_weights = [study.best_params[model] for model in oof_pred_probs.keys()]
    best_weights /= np.sum(best_weights)
    
    return {
        "threshold": study.best_params["threshold"],
        "weight": best_weights
    }

In [20]:
import gc
import os
import warnings
import joblib
import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.metrics import f1_score


def cross_validate_classifier(X, label, meta, body_parts_str, section):
    oof = pd.DataFrame(index=meta.index)
    
    f1_list = []
    submission_list = []
    thresholds = {}
    weights = {}
    
    # iter by action (binary)
    for action in label.columns:
        action_mask = ~ label[action].isna().values
        y_action = label[action][action_mask].values.astype(int)
        X_action = X[action_mask]
        groups_action = meta.video_id[action_mask]

        # not enough video for K-fold
        if len(np.unique(groups_action)) < CFG.n_splits:
            print(f"\tSkip (not enough groups). Section: {section} Action: {action}")
            continue

        # if all label is 0 -> not meaningful
        if (y_action == 0).all():
            oof_action = np.zeros(len(y_action), dtype=float)
            print(f"\tF1: 0.0000 (0.00) Section: {section} Action: {action}")
        else:
            try:
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore", category=RuntimeWarning)
                    
                    model_names = [
                        "xgboost3_gpu",
                        "xgboost4_gpu",
                        "xgboost5_gpu"
                    ]
                    
                    oof_pred_probs = {}
                    for model_name in model_names:
                        file_path = f"{CFG.model_path}/{model_name}/{section}/{action}/oof_pred_probs.pkl"
                        if os.path.exists(file_path):
                            oof_pred_probs[model_name] = joblib.load(file_path)
                        else:
                            oof_pred_probs[model_name] = np.zeros(len(y_action))

                    res = optimize_ensemble_predictions(oof_pred_probs, y_action)
                    
                    oof_action = np.zeros((oof_pred_probs[list(oof_pred_probs.keys())[0]].shape[0], ))
                    for model, weight in zip(oof_pred_probs.keys(), res["weight"]):
                        oof_action += oof_pred_probs[model] * weight

                    threshold = res["threshold"]
                    weights[action] = res["weight"]
                    thresholds[action] = threshold
            
                    f1 = f1_score(y_action, (oof_action >= threshold).astype(int), zero_division=0)
                    f1_list.append((body_parts_str, action, f1))

                    print(f"\tF1: {f1:.4f} (thr={threshold:.2f}) Section: {section} Action: {action}")

                    del oof_pred_probs, res, threshold
                    gc.collect()

            except Exception as e:
                oof_action = np.zeros(len(y_action), dtype=float)
                print(f"\tERROR: {e} -> F1: 0.0000 (0.00) Section: {section} Action: {action}")

        oof_column = np.zeros(len(label), dtype=float)
        oof_column[action_mask] = oof_action
        oof[action] = oof_column

        del oof_action, action_mask, X_action, y_action, groups_action
        gc.collect()

    submission_part = predict_multiclass(oof, meta, thresholds)
    submission_list.append(submission_part)

    return submission_list, f1_list, thresholds, weights

In [21]:
def submit(test_subset, fps_lookup, body_parts, mode, section, thresholds, weights=None):
    sample_gen = generate_mouse_data(test_subset, mode, is_train=False)
    submission_list = []

    for sample_mode, track_df, meta_df, actions in sample_gen:
        assert sample_mode == mode

        try:
            fps = _fps_from_meta(meta_df, fps_lookup, default_fps=30)
            if sample_mode == "single":
                X_te = transform_single(track_df, body_parts, fps)
            else:
                X_te = transform_pair(track_df, body_parts, fps)

            del track_df
            gc.collect()

            pred = pd.DataFrame(index=meta_df.video_frame)

            for action in actions:
                model_preds = []
                action_weights = weights.get(action)
                if action_weights is None:
                    continue  # skip

                model_paths = {
                    "xgboost3_gpu": f"{CFG.model_path}/xgboost3_gpu/{section}/{action}",
                    "xgboost4_gpu": f"{CFG.model_path}/xgboost4_gpu/{section}/{action}",
                    "xgboost5_gpu": f"{CFG.model_path}/xgboost5_gpu/{section}/{action}"
                }

                for model_name, model_path in model_paths.items():
                    model_file = glob.glob(f"{model_path}/*_trainer_*.pkl")
                    if len(model_file) == 1:
                        trainer = joblib.load(model_file[0])
                        model_preds.append(trainer.predict(X_te))
                        del trainer
                        gc.collect()
                    else:
                        model_preds.append(np.zeros(X_te.shape[0]))  # fallback

                # weighted sum
                pred[action] = sum(w * p for w, p in zip(action_weights, model_preds))

            del X_te
            gc.collect()

            if pred.shape[1] > 0:
                submission = predict_multiclass(pred, meta_df, thresholds)
                submission_list.append(submission)

        except KeyError:
            del track_df
            gc.collect()

    return submission_list


In [22]:
if CFG.mode == "validate":
    thresholds = {
        "single": {},
        "pair": {}
    }
    weights = {
        "single": {},
        "pair": {}
    }
else:
    thresholds = joblib.load(f"{CFG.model_path}/ensemble_345/thresholds.pkl")
    weights = joblib.load(f"{CFG.model_path}/ensemble_345/weights.pkl")

In [23]:
def process_mode(mode, subset, body_parts, fps_lookup, section, thresholds, weights, f1_list, submission_list):
    # validate or test
    if CFG.mode == "validate":
        data_list, label_list, meta_list = [], [], []
    
        for switch, data, meta, label in generate_mouse_data(subset):
            if switch != mode:
                continue
            data_list.append(data)
            meta_list.append(meta)
            label_list.append(label)
            del data, meta, label
        gc.collect()
    
        if len(data_list) == 0:
            return  # no sample for this mode
    
        # features for each sample
        feats_parts = []
        for data_i, meta_i in zip(data_list, meta_list):
            fps_i = _fps_from_meta(meta_i, fps_lookup, default_fps=30.0)
            if mode == "single":
                X_i = transform_single(data_i, body_parts, fps_i)
            else:
                X_i = transform_pair(data_i, body_parts, fps_i)
                
            feats_parts.append(X_i.astype(np.float32))
            del X_i, fps_i
        gc.collect()
    
        X_tr = pd.concat(feats_parts, axis=0, ignore_index=True)
        y_tr = pd.concat(label_list, axis=0, ignore_index=True)
        meta_tr = pd.concat(meta_list, axis=0, ignore_index=True)
    
        del feats_parts, data_list, label_list, meta_list
        gc.collect()
    
        temp_sub_list, temp_f1_list, temp_thr, temp_w = cross_validate_classifier(X_tr, y_tr, meta_tr, body_parts_str=str(body_parts), section=section)

        # save thresholds
        if str(section) not in thresholds[mode]:
            thresholds[mode][str(section)] = {}
        thresholds[mode][str(section)].update(temp_thr)

        # save weights
        if str(section) not in weights[mode]:
            weights[mode][str(section)] = {}
        weights[mode][str(section)].update(temp_w)
        

        f1_list.extend(temp_f1_list)
        submission_list.extend(temp_sub_list)

        del temp_sub_list, temp_f1_list, temp_thr, temp_w, X_tr
        gc.collect()

    else:
        section_thresholds = thresholds[mode].get(str(section), {})
        section_weights = weights[mode].get(str(section), {})

        print(f"[INFO] section={section} | thresholds len = {len(section_thresholds)} | weights len = {len(section_weights)}")
        
        if not section_thresholds:
            print(f"[WARN] Missing thresholds for mode={mode}, section={section}. Using default=empty dict.")
            
            section_thresholds = {}
    
        if not section_weights:
            print(f"[WARN] Missing weights for mode={mode}, section={section}. Using default=empty dict.")
            section_weights = {}
    
        for k, v in section_thresholds.items():
            if v is None or (isinstance(v, float) and (v != v)):  # NaN check
                print(f"[WARN] Threshold '{k}' in section {section} is invalid ({v}). Setting to 0.")
                section_thresholds[k] = 0.0
            else:
                print(f"[INFO] Threshold '{k}' in section {section} has value of ({v}).")
                
    
        for k, v in section_weights.items():
            if v is None or (isinstance(v, float) and (v != v)):
                print(f"[WARN] Weight '{k}' in section {section} is invalid ({v}). Setting to 1.")
                section_weights[k] = 1.0
            else:
                print(f"[INFO] Weight '{k}' in section {section} has value of ({v}).")
        
        temp_sub_list = submit(test_subset=subset, fps_lookup=fps_lookup, body_parts=body_parts, mode=mode, section=section, thresholds=section_thresholds, weights=section_weights)
        submission_list.extend(temp_sub_list)

        del temp_sub_list
        gc.collect()


In [24]:
f1_list = []
submission_list = []
import traceback

for section in range(1, len(body_parts_tracked_list)):
    body_parts_tracked_str = body_parts_tracked_list[section]

    try:
        body_parts = json.loads(body_parts_tracked_str)
        print(f"{section}/{len(body_parts_tracked_list)-1} Processing videos with: {body_parts}\n")

        if len(body_parts) > 5:
            body_parts = [b for b in body_parts if b not in DROP_BODY_PARTS]

        if CFG.mode == "validate":
            subset =  train[train.body_parts_tracked == body_parts_tracked_str]
        else:
            subset = test[test.body_parts_tracked == body_parts_tracked_str]

        # lookup FPS
        fps_lookup = (
            subset[["video_id", "frames_per_second"]]
            .drop_duplicates("video_id")
            .set_index("video_id")["frames_per_second"]
            .to_dict()
        )

        # single
        process_mode(
            mode="single",
            subset=subset,
            body_parts=body_parts,
            fps_lookup=fps_lookup,
            section=section,
            thresholds=thresholds,
            weights=weights,
            f1_list=f1_list,
            submission_list=submission_list,
        )

        # pair
        process_mode(
            mode="pair",
            subset=subset,
            body_parts=body_parts,
            fps_lookup=fps_lookup,
            section=section,
            thresholds=thresholds,
            weights=weights,
            f1_list=f1_list,
            submission_list=submission_list,
        )

        print(f"Length of submission_list: {len(submission_list)}\n")

    except Exception as e:
        print(f"\tError: {e}")



1/9 Processing videos with: ['body_center', 'ear_left', 'ear_right', 'headpiece_bottombackleft', 'headpiece_bottombackright', 'headpiece_bottomfrontleft', 'headpiece_bottomfrontright', 'headpiece_topbackleft', 'headpiece_topbackright', 'headpiece_topfrontleft', 'headpiece_topfrontright', 'lateral_left', 'lateral_right', 'neck', 'nose', 'tail_base', 'tail_midpoint', 'tail_tip']

[INFO] section=1 | thresholds len = 1 | weights len = 1
[INFO] Threshold 'rear' in section 1 has value of (0.18124751788379836).
[INFO] Weight 'rear' in section 1 has value of ([ 0.39281996  0.72423724 -0.1170572 ]).
[INFO] section=1 | thresholds len = 6 | weights len = 6
[INFO] Threshold 'approach' in section 1 has value of (0.15224701027015639).
[INFO] Threshold 'attack' in section 1 has value of (0.07090925320060291).
[INFO] Threshold 'avoid' in section 1 has value of (0.24324612257373307).
[INFO] Threshold 'chase' in section 1 has value of (0.17321824146917764).
[INFO] Threshold 'chaseattack' in section 1 ha

In [25]:
if CFG.mode == 'validate':  
    submission = pd.concat(submission_list)
    cleaned_submission = clean_and_fill_submission(submission, train)
    print(f"Competition metric: {score(solution, cleaned_submission, ''):.4f}")

    f1_df = pd.DataFrame(f1_list, columns=['body_parts_tracked_str', 'action', 'binary F1 score'])
    print(f"Mean F1:            {f1_df['binary F1 score'].mean():.4f}")
  
    os.makedirs("ensemble", exist_ok=True)
    joblib.dump(thresholds, f"ensemble/thresholds.pkl")
    joblib.dump(weights, f"ensemble/weights.pkl")
    joblib.dump(f1_df, f"ensemble/scores.pkl")

elif CFG.mode == 'submit':
    if len(submission_list) > 0:
        submission = pd.concat(submission_list)
    else:
        submission = pd.DataFrame(
            dict(
                video_id=438887472,
                agent_id='mouse1',
                target_id='self',
                action='rear',
                start_frame=278,
                stop_frame=500
            ), index=[44])

    cleaned_submission = clean_and_fill_submission(submission, test, is_train=False)
    cleaned_submission.index.name = 'row_id'
    cleaned_submission.to_csv('submission.csv')
    submission.head()