In [4]:
# imports

%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append("..")

import pandas as pd
import numpy as np

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.calibration import CalibratedClassifierCV
import pandas as pd
import numpy as np

from utils import visual_tools as visualTools
from utils import gt_and_modeling_dfs as prepare_df
from utils import evaluation_tools as evalTools

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.calibration import CalibratedClassifierCV


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
# Cell 2 — Episode config (как в SIM1)
FPS_TO_SAVE = 25

EPISODES = {
    "Muppets-02-01-01": {
        "path": "../data/raw/Muppets-02-01-01.avi",
        "train_split_timestamp": "19:30",
        "ground_truth_path": "../data/muppets-gt-2025wt/Ground_Truth_New_01.xlsx"
    },
    "Muppets-02-04-04": {
        "path": "../data/raw/Muppets-02-04-04.avi",
        "train_split_timestamp": "19:52",
        "ground_truth_path": "../data/muppets-gt-2025wt/Ground_Truth_New_04.xlsx"
    },
    "Muppets-03-04-03": {
        "path": "../data/raw/Muppets-03-04-03.avi",
        "train_split_timestamp": "19:54",
        "ground_truth_path": "../data/muppets-gt-2025wt/Ground_Truth_New_03.xlsx"
    }
}

EPISODE_NAME_TO_VIDEO_ID = {
    "Muppets-02-01-01": 211,
    "Muppets-02-04-04": 244,
    "Muppets-03-04-03": 343
}


In [None]:
print({k: v["train_split_timestamp"] for k, v in EPISODES.items()})

In [32]:
gt_path = "../data/processed/all_ep_gt.csv"
all_ep_gt_df = pd.read_csv(gt_path)

# Aliases for SIM2
all_ep_gt_df["Piggy"] = all_ep_gt_df["Miss Piggy"].astype(int)
all_ep_gt_df["Chef"] = all_ep_gt_df["Cook"].astype(int)
all_ep_gt_df["OtherPigs"] = ((all_ep_gt_df["Pigs"] == 1) & (all_ep_gt_df["Piggy"] == 0)).astype(int)

SIM2_CHARACTER_LABEL_COLS = ["Piggy", "OtherPigs", "Chef"]

all_ep_gt_df[["Video", "Frame_number", "Timestamp"] + SIM2_CHARACTER_LABEL_COLS].head()


Unnamed: 0,Video,Frame_number,Timestamp,Piggy,OtherPigs,Chef
0,211,0,00:00.00,0,0,0
1,211,1,00:00.04,0,0,0
2,211,2,00:00.08,0,0,0
3,211,3,00:00.12,0,0,0
4,211,4,00:00.16,0,0,0


In [30]:
def split_presence_report(gt_df, EPISODES, EPISODE_NAME_TO_VIDEO_ID, targets):
    rows = []
    for episode_name, ep in EPISODES.items():
        split_ts = ep["train_split_timestamp"]
        split_sec = visualTools.parse_timestamp(split_ts)
        vid = EPISODE_NAME_TO_VIDEO_ID[episode_name]

        ep_df = gt_df[gt_df["Video"] == vid].copy()
        ep_df["_ts_sec"] = ep_df["Timestamp"].apply(visualTools.parse_timestamp)

        train = ep_df[ep_df["_ts_sec"] <= split_sec]
        test  = ep_df[ep_df["_ts_sec"] > split_sec]

        for t in targets:
            rows.append({
                "episode": episode_name,
                "video_id": vid,
                "split_ts": split_ts,
                "target": t,
                "train_pos": int(train[t].sum()),
                "test_pos": int(test[t].sum()),
                "train_total": int(len(train)),
                "test_total": int(len(test)),
            })
    return pd.DataFrame(rows)

report_df = split_presence_report(all_ep_gt_df, EPISODES, EPISODE_NAME_TO_VIDEO_ID, SIM2_CHARACTER_LABEL_COLS)
display(report_df)

# hard warning: any test_pos == 0
bad_test = report_df[report_df["test_pos"] == 0]
if len(bad_test) > 0:
    print("\n[!!! WARNING] Some episode has 0 positives in TEST for:")
    display(bad_test)
else:
    print("[OK] All episode-target pairs have positives in TEST.")

# also check train
bad_train = report_df[report_df["train_pos"] == 0]
if len(bad_train) > 0:
    print("\n[!!! WARNING] Some episode has 0 positives in TRAIN for:")
    display(bad_train)
    print("=> Model for that target cannot learn from that episode split.")
else:
    print("[OK] All episode-target pairs have positives in TRAIN (at least 1).")

# totals across episodes (what matters most)
totals = report_df.groupby("target")[["train_pos","test_pos","train_total","test_total"]].sum()
totals["test_share"] = totals["test_total"] / (totals["train_total"] + totals["test_total"])
display(totals)


Unnamed: 0,episode,video_id,split_ts,target,train_pos,test_pos,train_total,test_total
0,Muppets-02-01-01,211,19:30,Piggy,1661,0,29251,9430
1,Muppets-02-01-01,211,19:30,OtherPigs,179,0,29251,9430
2,Muppets-02-01-01,211,19:30,Chef,550,1315,29251,9430
3,Muppets-02-04-04,244,19:52,Piggy,3256,0,29801,8905
4,Muppets-02-04-04,244,19:52,OtherPigs,3500,2008,29801,8905
5,Muppets-02-04-04,244,19:52,Chef,225,0,29801,8905
6,Muppets-03-04-03,343,19:54,Piggy,2914,1677,29851,8647
7,Muppets-03-04-03,343,19:54,OtherPigs,4231,2010,29851,8647
8,Muppets-03-04-03,343,19:54,Chef,1356,0,29851,8647





Unnamed: 0,episode,video_id,split_ts,target,train_pos,test_pos,train_total,test_total
0,Muppets-02-01-01,211,19:30,Piggy,1661,0,29251,9430
1,Muppets-02-01-01,211,19:30,OtherPigs,179,0,29251,9430
3,Muppets-02-04-04,244,19:52,Piggy,3256,0,29801,8905
5,Muppets-02-04-04,244,19:52,Chef,225,0,29801,8905
8,Muppets-03-04-03,343,19:54,Chef,1356,0,29851,8647


[OK] All episode-target pairs have positives in TRAIN (at least 1).


Unnamed: 0_level_0,train_pos,test_pos,train_total,test_total,test_share
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chef,2131,1315,88903,26982,0.232834
OtherPigs,7910,4018,88903,26982,0.232834
Piggy,7831,1677,88903,26982,0.232834


In [34]:
# 5 — Build SIM2 visual feature space (sequential) + save visual_sim2.csv

SIM2_VISUAL_FEATURES = ["lbp32", "hog", "flow"]

out_fs = "../data/processed/feature_spaces/visual_sim2.csv"

feature_space_visual_sim2 = prepare_df.build_feature_space_df_sequential(
    feature_extractor_fn=visualTools.extract_visual_features_for_frame_sim2,
    feature_list=SIM2_VISUAL_FEATURES,
    gt_df=all_ep_gt_df,
    characters=SIM2_CHARACTER_LABEL_COLS,
    video_name_to_gt=EPISODE_NAME_TO_VIDEO_ID,
    out_path=out_fs
)

feature_space_visual_sim2.head()


Muppets-02-01-01: 100%|███████████████████████████████████████| 38681/38681 [48:46<00:00, 13.22it/s]
Muppets-02-04-04: 100%|███████████████████████████████████████| 38706/38706 [56:09<00:00, 11.49it/s]
Muppets-03-04-03: 100%|███████████████████████████████████████| 38498/38498 [53:16<00:00, 12.04it/s]



[Feature space SIM2] saved (115885, 44) -> ../data/processed/feature_spaces/visual_sim2.csv


Unnamed: 0,Video,Frame_number,Timestamp,frame,lbp_0,lbp_1,lbp_2,lbp_3,lbp_4,lbp_5,...,lbp_30,lbp_31,hog_mean,hog_std,flow_mag_mean,flow_mag_std,flow_horiz_ratio,Piggy,OtherPigs,Chef
0,211,0,00:00.00,frame0.jpg,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
1,211,1,00:00.04,frame1.jpg,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
2,211,2,00:00.08,frame2.jpg,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
3,211,3,00:00.12,frame3.jpg,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
4,211,4,00:00.16,frame4.jpg,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0


In [35]:
# 6 — Split feature space train/test (as SIM1)

feature_df = pd.read_csv(out_fs)

train_df, test_df = prepare_df.split_feature_space_df(
    feature_df=feature_df,
    EPISODES=EPISODES,
    EPISODE_NAME_TO_VIDEO_ID=EPISODE_NAME_TO_VIDEO_ID
)

META_COLS = ["Video", "frame", "Frame_number", "Timestamp"]
DROP_COLS = SIM2_CHARACTER_LABEL_COLS + META_COLS

X_train_df = train_df.drop(columns=DROP_COLS)
X_test_df  = test_df.drop(columns=DROP_COLS)

assert list(X_train_df.columns) == list(X_test_df.columns)

# scaling: useful for SGD; harmless for ExtraTrees
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_df)
X_test  = scaler.transform(X_test_df)

y_test_df = test_df[META_COLS + SIM2_CHARACTER_LABEL_COLS].copy()


[split] Muppets-02-01-01 | Video=211 | train=29251, test=9430
[split] Muppets-02-04-04 | Video=244 | train=29801, test=8905
[split] Muppets-03-04-03 | Video=343 | train=29851, test=8647
[FINAL SPLIT] train=(88903, 44), test=(26982, 44)


In [36]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


In [37]:
# 7 — Train + predict per-predicate

MODEL_KIND = "extratrees"  # "extratrees" or "sgd_calibrated"

def fit_predict_binary(model_kind, X_train, y_train, X_test):
    if model_kind == "extratrees":
        model = ExtraTreesClassifier(
            n_estimators=200,
            random_state=42,
            n_jobs=-1,
            max_depth=None
        )
        model.fit(X_train, y_train)
        proba = model.predict_proba(X_test)[:, 1]
        pred = (proba >= 0.5).astype(int)
        return pred, proba

    elif model_kind == "sgd_calibrated":
        base = SGDClassifier(
            loss="hinge",  
            alpha=1e-4,
            max_iter=2000,
            random_state=42
        )
        model = CalibratedClassifierCV(base, cv=3, method="sigmoid")
        model.fit(X_train, y_train)
        proba = model.predict_proba(X_test)[:, 1]
        pred = (proba >= 0.5).astype(int)
        return pred, proba

    else:
        raise ValueError("Unknown model_kind")

for ch in SIM2_CHARACTER_LABEL_COLS:
    y_train = train_df[ch].astype(int).values

    y_pred, y_score = fit_predict_binary(MODEL_KIND, X_train, y_train, X_test)

    y_test_df[f"{ch}_present"] = y_pred
    y_test_df[f"{ch}_score"] = y_score

print("[OK] Predictions done.")
y_test_df.head()


[OK] Predictions done.


Unnamed: 0,Video,frame,Frame_number,Timestamp,Piggy,OtherPigs,Chef,Piggy_present,Piggy_score,OtherPigs_present,OtherPigs_score,Chef_present,Chef_score
0,211,frame29251.jpg,29251,19:30.04,0,0,1,0,0.0,0,0.0,1,0.83
1,211,frame29252.jpg,29252,19:30.08,0,0,1,0,0.005,0,0.03,1,0.885
2,211,frame29253.jpg,29253,19:30.12,0,0,1,0,0.005,0,0.03,1,0.84
3,211,frame29254.jpg,29254,19:30.16,0,0,1,0,0.0,0,0.03,1,0.8
4,211,frame29255.jpg,29255,19:30.20,0,0,1,0,0.0,0,0.01,1,0.755


In [38]:
# 8 — Evaluation (MAP/PR/ROC/CM overlay)

metrics_dict, overall_map = evalTools.evaluate_multiclass(
    y_true_df=y_test_df[SIM2_CHARACTER_LABEL_COLS],
    y_pred_df=y_test_df,
    characters=SIM2_CHARACTER_LABEL_COLS
)


Mean Average Precision (MAP) per character:
Piggy: MAP=0.078
OtherPigs: MAP=0.220
Chef: MAP=0.814

Overall MAP (all characters): 0.371


## Result discussion (SIM2 Visual)

We trained per-predicate binary classifiers on a compact SIM2-level visual feature space:
- **LBP(32 bins)** for texture,
- **HOG (mean/std)** for shape/contours,
- **Farneback optical flow (mean/std/ratio)** for motion.

### Key observation
The visual detector performs very differently across targets:

- **Chef achieves high MAP** (e.g., ~0.81 in our run).  
  This is expected because the Swedish Chef is characterized by **distinctive motion patterns** (high activity, fast arm/body movements).  
  Our optical-flow features capture this well, so even a simple classifier can separate many positive frames from the background.

- **OtherPigs is moderate** (e.g., ~0.22).  
  Pigs share some recurring visual structures (snout/face texture), which can be partially captured by LBP + HOG, but the category is heterogeneous: different pig characters, poses, partial visibility, and frequent occlusions reduce separability.

- **Piggy is challenging** (e.g., ~0.08).  
  Ms Piggy often appears under **strong variations in pose, illumination, camera distance**, and is visually similar to other characters in terms of general texture/edges.  
  Because we intentionally avoided heavy local descriptors (e.g., DAISY) and do not rely on color-based heuristics, the current compact features may be insufficient to consistently discriminate Piggy.

### Interpretation in terms of feature design
- **Motion features** are a strong cue for Chef → high MAP.
- **Texture/shape cues** (LBP/HOG aggregates) are weaker for Piggy → low MAP.
- The results are consistent with SIM1’s time-aware evaluation philosophy: generalization is tested on future timestamps, which reduces optimistic leakage from neighboring frames.

### Next steps (within constraints)
To improve Piggy without expensive descriptors, we can:
- tune HOG/LBP parameters or add small additional aggregates (still lightweight),
- compare a non-linear ensemble (ExtraTrees) with a calibrated linear model (SGD+Calibrated) to see which generalizes better.


In [39]:
# 9 — Save VISUAL SIM2 predictions for fusion / reporting

KEY_COLS = ["Video", "Frame_number", "Timestamp"]

visual_sim2_pred = test_df[KEY_COLS].copy()

for ch in SIM2_CHARACTER_LABEL_COLS:
    visual_sim2_pred[f"{ch}_score"] = y_test_df[f"{ch}_score"].values
    visual_sim2_pred[f"{ch}_present"] = y_test_df[f"{ch}_present"].values

out_path = "../data/processed/preds/visual_sim2_pred.csv"
visual_sim2_pred.to_csv(out_path, index=False)

print(f"[OK] Visual SIM2 predictions saved to {out_path}")
visual_sim2_pred.head()


[OK] Visual SIM2 predictions saved to ../data/processed/preds/visual_sim2_pred.csv


Unnamed: 0,Video,Frame_number,Timestamp,Piggy_score,Piggy_present,OtherPigs_score,OtherPigs_present,Chef_score,Chef_present
0,211,29251,19:30.04,0.0,0,0.0,0,0.83,1
1,211,29252,19:30.08,0.005,0,0.03,0,0.885,1
2,211,29253,19:30.12,0.005,0,0.03,0,0.84,1
3,211,29254,19:30.16,0.0,0,0.03,0,0.8,1
4,211,29255,19:30.20,0.0,0,0.01,0,0.755,1


In [40]:
# 8.1 — Compare models: ExtraTrees vs SGD+Calibrated (MAP only)


def fit_predict_binary(model_kind, X_train, y_train, X_test):
    if model_kind == "extratrees":
        model = ExtraTreesClassifier(
            n_estimators=300,
            random_state=42,
            n_jobs=-1,
            max_depth=None
        )
        model.fit(X_train, y_train)
        proba = model.predict_proba(X_test)[:, 1]
        pred = (proba >= 0.5).astype(int)
        return pred, proba

    elif model_kind == "sgd_calibrated":
        base = SGDClassifier(
            loss="hinge",  # можно попробовать "log_loss" позже
            alpha=1e-4,
            max_iter=3000,
            random_state=42
        )
        model = CalibratedClassifierCV(base, cv=3, method="sigmoid")
        model.fit(X_train, y_train)
        proba = model.predict_proba(X_test)[:, 1]
        pred = (proba >= 0.5).astype(int)
        return pred, proba

    else:
        raise ValueError("Unknown model_kind")

def run_model_kind(model_kind, X_train, X_test, train_df, test_df, label_cols, evalTools):
    y_test_df_local = test_df[label_cols].copy()
    for ch in label_cols:
        y_train = train_df[ch].astype(int).values
        y_pred, y_score = fit_predict_binary(model_kind, X_train, y_train, X_test)
        y_test_df_local[f"{ch}_present"] = y_pred
        y_test_df_local[f"{ch}_score"] = y_score

    metrics_dict, overall_map = evalTools.evaluate_multiclass(
        y_true_df=y_test_df_local[label_cols],
        y_pred_df=y_test_df_local,
        characters=label_cols
    )
    per_char_map = {ch: metrics_dict[ch]["MAP"] for ch in label_cols}
    return per_char_map, overall_map

results = []
for kind in ["extratrees", "sgd_calibrated"]:
    per_char_map, overall_map = run_model_kind(
        kind, X_train, X_test, train_df, test_df,
        SIM2_CHARACTER_LABEL_COLS, evalTools
    )
    row = {"model": kind, "overall_map": overall_map, **{f"MAP_{k}": v for k, v in per_char_map.items()}}
    results.append(row)

compare_df = pd.DataFrame(results).sort_values("overall_map", ascending=False)
compare_df


Mean Average Precision (MAP) per character:
Piggy: MAP=0.078
OtherPigs: MAP=0.224
Chef: MAP=0.825

Overall MAP (all characters): 0.376



divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by

Mean Average Precision (MAP) per character:
Piggy: MAP=0.212
OtherPigs: MAP=0.169
Chef: MAP=0.317

Overall MAP (all characters): 0.233


Unnamed: 0,model,overall_map,MAP_Piggy,MAP_OtherPigs,MAP_Chef
0,extratrees,0.375567,0.078044,0.223883,0.824774
1,sgd_calibrated,0.232636,0.211618,0.169318,0.316971


In [41]:
# 8.2 — the final model for reporting
BEST_MODEL_KIND = compare_df.iloc[0]["model"]
print("Best model by overall MAP:", BEST_MODEL_KIND)


Best model by overall MAP: extratrees
