# Config

In [1]:
from pathlib import Path
from pathlib import Path
from tqdm.auto import tqdm

import os
import sys
import gc
import re
import ast
import itertools

import numpy as np
import polars as pl
import xgboost as xgb

In [2]:
MABE_PKG_DIR = Path("/kaggle/input/mabe-package")
STARTER_DIR = Path("/kaggle/input/mabe-artifacts")
COMP_DIR = Path("/kaggle/input/MABe-mouse-behavior-detection")
INPUT_DIR = COMP_DIR
TRAIN_TRACKING_DIR = INPUT_DIR / "train_tracking"
TRAIN_ANNOTATION_DIR = INPUT_DIR / "train_annotation"
TEST_TRACKING_DIR = INPUT_DIR / "test_tracking"

WORKING_DIR = Path("/kaggle/working")
WORKING_DIR.mkdir(parents=True, exist_ok=True)

SELF_FEATURE_DIR = WORKING_DIR / "self_features"
PAIR_FEATURE_DIR = WORKING_DIR / "pair_features"
SELF_FEATURE_DIR.mkdir(parents=True, exist_ok=True)
PAIR_FEATURE_DIR.mkdir(parents=True, exist_ok=True)

In [3]:
!cp /kaggle/input/mabe-artifacts/self_features.py .
!cp /kaggle/input/mabe-artifacts/pair_features.py .
!cp /kaggle/input/mabe-artifacts/robustify.py .
!cp -r /kaggle/input/mabe-artifacts/results .

%run -i self_features.py
%run -i pair_features.py
%run -i robustify.py

In [4]:
INDEX_COLS = [
    "video_id",
    "agent_mouse_id",
    "target_mouse_id",
    "video_frame",
]

BODY_PARTS = [
    "ear_left",
    "ear_right",
    "nose",
    "neck",
    "body_center",
    "lateral_left",
    "lateral_right",
    "hip_left",
    "hip_right",
    "tail_base",
    "tail_tip",
]

SELF_BEHAVIORS = [
    "biteobject",
    "climb",
    "dig",
    "exploreobject",
    "freeze",
    "genitalgroom",
    "huddle",
    "rear",
    "rest",
    "run",
    "selfgroom",
]

PAIR_BEHAVIORS = [
    "allogroom",
    "approach",
    "attack",
    "attemptmount",
    "avoid",
    "chase",
    "chaseattack",
    "defend",
    "disengage",
    "dominance",
    "dominancegroom",
    "dominancemount",
    "ejaculate",
    "escape",
    "flinch",
    "follow",
    "intromit",
    "mount",
    "reciprocalsniff",
    "shepherd",
    "sniff",
    "sniffbody",
    "sniffface",
    "sniffgenital",
    "submit",
    "tussle",
]

# Helper Function

In [5]:
def parse_behaviors_column(behaviors_str: str):
    """
    behaviors_labeled is stored as a Python like list of tuples.
    Use ast.literal_eval for safety instead of eval.
    Example:
      "[('mouse1','mouse2','sniff'), ('mouse2','mouse1','sniff')]"
    """
    if behaviors_str is None:
        return []
    return ast.literal_eval(behaviors_str)


def build_behavior_dataframe(test_df: pl.DataFrame) -> pl.DataFrame:
    """
    Expand behaviors_labeled into one row per (lab, video, agent, target, behavior).
    """
    behavior_df = (
        test_df
        .filter(pl.col("behaviors_labeled").is_not_null())
        .select(["lab_id", "video_id", "behaviors_labeled"])
        .with_columns(
            pl.col("behaviors_labeled")
            .map_elements(
                parse_behaviors_column,
                return_dtype=pl.List(pl.Utf8),
            )
            .alias("behaviors_labeled_list")
        )
        .explode("behaviors_labeled_list")
        .rename({"behaviors_labeled_list": "behaviors_labeled_element"})
        .with_columns(
            pl.col("behaviors_labeled_element").str.split(",").list.get(0)
            .str.replace_all("[()' ]", "")
            .alias("agent"),
            pl.col("behaviors_labeled_element").str.split(",").list.get(1)
            .str.replace_all("[()' ]", "")
            .alias("target"),
            pl.col("behaviors_labeled_element").str.split(",").list.get(2)
            .str.replace_all("[()' ]", "")
            .alias("behavior"),
        )
        .select(["lab_id", "video_id", "agent", "target", "behavior"])
    )
    return behavior_df


def extract_mouse_id(mouse_str: str) -> int:
    """
    Convert 'mouse1' to 1, 'mouse2' to 2, 'self' to -1.
    """
    if mouse_str == "self":
        return -1
    m = re.search(r"mouse(\d+)", mouse_str)
    if m:
        return int(m.group(1))
    raise ValueError(f"Unexpected mouse id format: {mouse_str}")

In [6]:
def load_features_for_group(
    lab_id: str,
    video_id: int,
    agent: str,
    target: str,
) -> tuple[pl.DataFrame, pl.DataFrame]:
    """
    Load per frame features for a given (lab, video, agent, target) group.
    Returns:
      index_df   - DataFrame with INDEX_COLS
      feature_df - DataFrame with feature columns only
    """
    agent_mouse_id = extract_mouse_id(agent)
    target_mouse_id = extract_mouse_id(target)

    if target == "self":
        feature_path = SELF_FEATURE_DIR / f"{video_id}.parquet"
        scan = pl.scan_parquet(feature_path).filter(
            pl.col("agent_mouse_id") == agent_mouse_id
        )
    else:
        feature_path = PAIR_FEATURE_DIR / f"{video_id}.parquet"
        scan = pl.scan_parquet(feature_path).filter(
            (pl.col("agent_mouse_id") == agent_mouse_id)
            & (pl.col("target_mouse_id") == target_mouse_id)
        )

    full_df = scan.collect()
    if full_df.height == 0:
        return full_df, full_df

    index_df = full_df.select(INDEX_COLS)
    feature_df = full_df.select(pl.exclude(INDEX_COLS))
    return index_df, feature_df

In [7]:
def load_models_for_behavior(lab_id: str, behavior: str):
    """
    Load all fold models and thresholds for a given (lab, behavior).
    Returns a list of (model, threshold).
    """
    behavior_dir = WORKING_DIR / "results" / lab_id / behavior
    fold_dirs = sorted(behavior_dir.glob("fold_*"))
    models = []
    for fold_dir in fold_dirs:
        model_file = fold_dir / "model.json"
        thr_file = fold_dir / "threshold.txt"
        if not model_file.exists() or not thr_file.exists():
            continue
        with open(thr_file, "r") as f:
            threshold = float(f.read().strip())
        model = xgb.Booster(model_file=str(model_file))
        models.append((model, threshold))
    return models

In [8]:
def smooth_probs_submit(
    index_df: pl.DataFrame,
    probs: np.ndarray,
    window: int = 5,
) -> np.ndarray:
    """
    Smooth prob cho 1 group (1 video, 1 agent, 1 target) ở bước submit.
    Giống logic train: rolling_mean theo thời gian rồi trả về đúng thứ tự ban đầu.
    """
    n = len(probs)
    df = pl.DataFrame(
        {
            "row_idx": np.arange(n, dtype=np.int32),
            "video_frame": index_df.get_column("video_frame"),
            "prob": probs,
        }
    )

    df = (
        df
        .sort("video_frame")
        .with_columns(
            pl.col("prob")
            .rolling_mean(window_size=window, center=True, min_samples=1)
            .alias("prob_smooth")
        )
        .sort("row_idx")  # trả về đúng thứ tự ban đầu
    )

    return df.get_column("prob_smooth").to_numpy()

In [9]:
def predict_for_group(
    lab_id: str,
    video_id: int,
    agent: str,
    target: str,
    group_behaviors: pl.DataFrame,
) -> pl.DataFrame | None:
    """
    Run inference for one group of (lab_id, video_id, agent, target).
    Returns a Polars DataFrame close to submission format for that group.
    """
    index_df, feature_df = load_features_for_group(lab_id, video_id, agent, target)

    if feature_df.height == 0:
        return None

    # Create XGBoost DMatrix once per group and reuse across behaviors
    dtest = xgb.DMatrix(feature_df.to_pandas(), feature_names=feature_df.columns)

    prediction_df = index_df.clone()
    used_cols = []

    # Unique behaviors for this group
    unique_behaviors = (
        group_behaviors.select("behavior").unique()["behavior"].to_list()
    )

    for behavior in unique_behaviors:
        models = load_models_for_behavior(lab_id, behavior)
        if not models:
            # No trained model for this (lab, behavior) in the starter models
            continue

        fold_predictions = []
        fold_labels = []

        for model, threshold in models:
            probs = model.predict(dtest)

            #---------------------smoothing--------------------
            probs = smooth_probs_submit(index_df, probs, window=5)
            #---------------------end--------------------------
            
            labels = (probs >= threshold).astype(np.int8)

            # Mask probabilities by labels: 0 when label is 0
            fold_predictions.append(probs * labels)
            fold_labels.append(labels)

        for fold_idx, probs_masked in enumerate(fold_predictions):
            col_name = f"{behavior}_fold{fold_idx}"
            prediction_df = prediction_df.with_columns(
                pl.Series(name=col_name, values=probs_masked.astype(np.float32))
            )
            used_cols.append(col_name)

    if not used_cols:
        return None

    # Pick best behavior per frame
    cols = prediction_df.select(pl.exclude(INDEX_COLS)).columns

    prediction_labels_df = (
        prediction_df
        .with_columns(
            pl.struct(pl.col(cols))
            .map_elements(
                lambda row: (
                    "none"
                    if sum(row.values()) == 0
                    else cols[int(np.argmax(list(row.values())))].split("_")[0]
                ),
                return_dtype=pl.String,
            )
            .alias("prediction")
        )
        .select(INDEX_COLS + ["prediction"])
    )

    # Convert per frame labels into time segments
    agent_mouse_id = extract_mouse_id(agent)
    target_mouse_id = extract_mouse_id(target)

    group_submission = (
        prediction_labels_df
        .filter(pl.col("prediction") != pl.col("prediction").shift(1))
        .with_columns(
            pl.col("video_frame").shift(-1).alias("stop_frame")
        )
        .filter(pl.col("prediction") != "none")
        .select(
            pl.col("video_id"),
            (pl.lit("mouse") + pl.lit(agent_mouse_id).cast(pl.Utf8)).alias("agent_id"),
            pl.when(pl.lit(target_mouse_id) == -1)
            .then(pl.lit("self"))
            .otherwise(pl.lit("mouse") + pl.lit(target_mouse_id).cast(pl.Utf8))
            .alias("target_id"),
            pl.col("prediction").alias("action"),
            pl.col("video_frame").alias("start_frame"),
            pl.col("stop_frame"),
        )
    )

    return group_submission

# Load metadata and build behavior table

In [10]:
print("Loading test metadata...")
test_df = pl.read_csv(INPUT_DIR / "test.csv")

print("Building behavior table from behaviors_labeled...")
behavior_df = build_behavior_dataframe(test_df)

groups = list(
    behavior_df.group_by("lab_id", "video_id", "agent", "target", maintain_order=True)
)
print(f"Number of (lab, video, agent, target) groups: {len(groups)}")

Loading test metadata...
Building behavior table from behaviors_labeled...
Number of (lab, video, agent, target) groups: 16


# Pre compute features for all videos

In [11]:
print("Generating self and pair features for all test videos...")

rows = test_df.rows(named=True)

for row in tqdm(rows, total=len(rows)):
    lab_id = row["lab_id"]
    video_id = row["video_id"]

    tracking_path = TEST_TRACKING_DIR / f"{lab_id}/{video_id}.parquet"
    tracking = pl.read_parquet(tracking_path)

    self_feat = make_self_features(metadata=row, tracking=tracking)
    pair_feat = make_pair_features(metadata=row, tracking=tracking)

    self_feat.write_parquet(SELF_FEATURE_DIR / f"{video_id}.parquet")
    pair_feat.write_parquet(PAIR_FEATURE_DIR / f"{video_id}.parquet")

    del self_feat, pair_feat, tracking
    gc.collect()

Generating self and pair features for all test videos...


  0%|          | 0/1 [00:00<?, ?it/s]

# Inference by group and segment construction

In [12]:
print("Running inference and building group submissions...")

group_submissions = []

for (lab_id, video_id, agent, target), group in tqdm(groups, total=len(groups)):
    group_submission = predict_for_group(
        lab_id=lab_id,
        video_id=video_id,
        agent=agent,
        target=target,
        group_behaviors=group,
    )

    if group_submission is not None and group_submission.height > 0:
        group_submissions.append(group_submission)

if not group_submissions:
    raise RuntimeError(
        "No submissions were generated. "
        "Check that starter models exist under /kaggle/working/results."
    )

submission = pl.concat(group_submissions, how="vertical").sort(
    "video_id",
    "agent_id",
    "target_id",
    "action",
    "start_frame",
    "stop_frame",
)

print("Initial submission rows:", submission.height)

Running inference and building group submissions...


  0%|          | 0/16 [00:00<?, ?it/s]

Initial submission rows: 3451


# Robustify and final clean up

In [13]:
print("Running robustify on submission...")
submission = robustify(submission, test_df, train_test="test")

# Safety filter: keep only intervals with start_frame < stop_frame
submission = submission.filter(pl.col("start_frame") < pl.col("stop_frame"))

print("Rows after robustify and filtering:", submission.height)

# Add row_id and save as submission.csv
final_submission = submission.with_row_index("row_id")
final_path = WORKING_DIR / "submission.csv"
final_submission.write_csv(final_path)

print("Saved submission to:", final_path)

# Quick head check
!head -n 10 /kaggle/working/submission.csv

Running robustify on submission...
ERROR: Dropped frames with start >= stop
INFO: Merged small gaps, events: 3440 -> 3172
INFO: Dropped 512 short events (action-dependent)
Rows after robustify and filtering: 2660
Saved submission to: /kaggle/working/submission.csv
row_id,video_id,agent_id,target_id,action,start_frame,stop_frame
0,438887472,mouse4,mouse3,attack,10,17
1,438887472,mouse4,mouse3,avoid,17,33
2,438887472,mouse4,mouse3,attack,33,38
3,438887472,mouse4,mouse3,approach,194,197
4,438887472,mouse4,mouse3,avoid,197,214
5,438887472,mouse4,mouse3,approach,214,238
6,438887472,mouse4,mouse3,attack,238,283
7,438887472,mouse4,mouse3,chase,283,293
8,438887472,mouse4,mouse3,chase,294,299
