# Similarity Modeling 1 - Visual Domain Features

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")
    
from utils import visual_tools as visualTools
from utils import gt_and_modeling_dfs as prepare_df
from utils import evaluation_tools as eval

import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Preprocessing Episodes

> Train-test split

> **Choice of split:**\
> There are 4 main characters that we need to identify so the split time from a given episode is selected based on the equal (rough idea) no of apperances of the all all character in both splits.

In [5]:
# fps -> acc to processign requirements
FPS_TO_SAVE = 25

EPISODES = {
    "Muppets-02-01-01": {
        "path": "../data/raw/Muppets-02-01-01.avi",
        "train_split_timestamp": "19:30",
        "ground_truth_path": "../data/muppets-gt-2025wt/Ground_Truth_New_01.xlsx"
    },
    "Muppets-02-04-04": {
        "path": "../data/raw/Muppets-02-04-04.avi",
        "train_split_timestamp": "19:52",
        "ground_truth_path": "../data/muppets-gt-2025wt/Ground_Truth_New_04.xlsx"
    },
    "Muppets-03-04-03": {
        "path": "../data/raw/Muppets-03-04-03.avi",
        "train_split_timestamp": "19:54",
        "ground_truth_path": "../data/muppets-gt-2025wt/Ground_Truth_New_03.xlsx"
    }
}

## Extract frames from all episodes

In [4]:
for episode_name, ep in EPISODES.items():

    print(f"\n=== Processing {episode_name} ===")

    video_path = ep["path"]

    frames_dir, total_saved, info = visualTools.extract_frames(
    video_file=video_path,
    episode_name=episode_name,
    fps_to_save=FPS_TO_SAVE
)

    print(f"[Frames] {episode_name}: saved={total_saved}")


=== Processing Muppets-02-01-01 ===
[frames ok] saved=38682 frames in ../data/processed/video/Muppets-02-01-01-frames
[Frames] Muppets-02-01-01: saved=38682

=== Processing Muppets-02-04-04 ===
[frames ok] saved=38707 frames in ../data/processed/video/Muppets-02-04-04-frames
[Frames] Muppets-02-04-04: saved=38707

=== Processing Muppets-03-04-03 ===
[frames ok] saved=38499 frames in ../data/processed/video/Muppets-03-04-03-frames
[Frames] Muppets-03-04-03: saved=38499


## Combine GT for all episodes

In [6]:
all_ep_gt_df = prepare_df.all_ep_gt(EPISODES)

display(all_ep_gt_df.head())
display(all_ep_gt_df.tail())
print(all_ep_gt_df["Video"].value_counts())

Consolidated GT: 115885 rows, 10 columns


Unnamed: 0,Video,Frame_number,Timestamp,Kermit,Pigs,Miss Piggy,Cook,StatlerWaldorf,Rowlf the Dog,Fozzie Bear
0,211,0,00:00.00,0,0,0,0,0,0,0
1,211,1,00:00.04,0,0,0,0,0,0,0
2,211,2,00:00.08,0,0,0,0,0,0,0
3,211,3,00:00.12,0,0,0,0,0,0,0
4,211,4,00:00.16,0,0,0,0,0,0,0


Unnamed: 0,Video,Frame_number,Timestamp,Kermit,Pigs,Miss Piggy,Cook,StatlerWaldorf,Rowlf the Dog,Fozzie Bear
115880,343,38493,25:39.72,0,0,0,0,0,0,0
115881,343,38494,25:39.76,0,0,0,0,0,0,0
115882,343,38495,25:39.80,0,0,0,0,0,0,0
115883,343,38496,25:39.84,0,0,0,0,0,0,0
115884,343,38497,25:39.88,0,0,0,0,0,0,0


Video
244    38706
211    38681
343    38498
Name: count, dtype: int64


# Build Feature Space 
Dataframe with Visual Features and SIM1 characters 

In [7]:
SIM1_CHARACTER_LABEL_COLS = [
    'Kermit',
    'StatlerWaldorf',
    'Fozzie Bear'
]

SIM1_VISUAL_FEATURES = [
    "dominant_color",
    "green_mask",
    "edge_magnitude",
    "frog_eye",
    "brown_rhythm"
]

META_FEATURES = [
    'Video',
    'frame',
    'Frame_number',
    'Timestamp'
]

EPISODE_NAME_TO_VIDEO_ID = {
    "Muppets-02-01-01": 211,
    "Muppets-02-04-04": 244,
    "Muppets-03-04-03": 343
}

GROUND_TRUTH = pd.read_csv('../data/processed/all_ep_gt.csv')

In [9]:
feature_space_visual_sim1 = prepare_df.build_feature_space_df(
    feature_extractor_fn=visualTools.extract_visual_features_for_frame,
    feature_list=SIM1_VISUAL_FEATURES,
    gt_df=GROUND_TRUTH,
    characters=SIM1_CHARACTER_LABEL_COLS,
    video_name_to_gt=EPISODE_NAME_TO_VIDEO_ID
)

Muppets-02-01-01: 100%|███████████████████████████████████████| 38681/38681 [08:20<00:00, 77.33it/s]
Muppets-02-04-04: 100%|███████████████████████████████████████| 38706/38706 [08:27<00:00, 76.31it/s]
Muppets-03-04-03: 100%|███████████████████████████████████████| 38498/38498 [06:29<00:00, 98.89it/s]



[Feature space] saved (115885, 16) -> ../data/processed/feature_spaces/visual_sim1.csv


In [10]:
visual_sim1 = pd.read_csv('../data/processed/feature_spaces/visual_sim1.csv')
visual_sim1.head()

Unnamed: 0,Video,Frame_number,Timestamp,frame,dom_H,dom_S,dom_V,green_frac,edge_mean,eye_blob_count,eye_horizontal_align,eye_pupil_contrast,brown_rhythm,Kermit,StatlerWaldorf,Fozzie Bear
0,211,0,00:00.00,frame0.jpg,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
1,211,1,00:00.04,frame1.jpg,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
2,211,2,00:00.08,frame2.jpg,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
3,211,3,00:00.12,frame3.jpg,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0
4,211,4,00:00.16,frame4.jpg,0,0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0


# Split Feature Space for Modeling
## train-test splits

In [11]:
feature_df = pd.read_csv(
    "../data/processed/feature_spaces/visual_sim1.csv"
)

train_df, test_df = prepare_df.split_feature_space_df(
    feature_df=feature_df,
    EPISODES=EPISODES,
    EPISODE_NAME_TO_VIDEO_ID=EPISODE_NAME_TO_VIDEO_ID
)


DROP_COLS = SIM1_CHARACTER_LABEL_COLS + META_FEATURES

X_train = train_df.drop(columns=DROP_COLS)
X_test  = test_df.drop(columns=DROP_COLS)


assert list(X_train.columns) == list(X_test.columns), \
    "Train/Test feature columns do not match!"

col_names = X_train.columns.tolist()

# print("Dropped columns:", DROP_COLS)
# print("Remaining columns:", X_train.columns.tolist())

# Scale
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

[split] Muppets-02-01-01 | Video=211 | train=29251, test=9430
[split] Muppets-02-04-04 | Video=244 | train=29801, test=8905
[split] Muppets-03-04-03 | Video=343 | train=29851, test=8647
[FINAL SPLIT] train=(88903, 16), test=(26982, 16)


# Model training and Evaluation

In [12]:
X_test  = scaler.transform(X_test)
print(f"Training features: {col_names}")

y_test_df = test_df[SIM1_CHARACTER_LABEL_COLS].copy()

for character in SIM1_CHARACTER_LABEL_COLS:

    y_train = train_df[character].astype(int).values
    y_test  = test_df[character].astype(int).values

    knn = KNeighborsClassifier(
        n_neighbors=5,
        weights="distance" # -ve convolution
    )

    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    print(f"\n > {character} Training Done")

    # store predictions
    y_test_df[f"{character}_present"] = y_pred

print("\n > Evaluation Rresults: ")
metrics = eval.evaluate_multiclass(
    y_true_df=y_test_df[SIM1_CHARACTER_LABEL_COLS],
    y_pred_df=y_test_df,
    characters=SIM1_CHARACTER_LABEL_COLS
)

Training features: ['dom_H', 'dom_S', 'dom_V', 'green_frac', 'edge_mean', 'eye_blob_count', 'eye_horizontal_align', 'eye_pupil_contrast', 'brown_rhythm']

 > Kermit Training Done

 > StatlerWaldorf Training Done

 > Fozzie Bear Training Done

 > Evaluation Rresults: 


Mean Average Precision (MAP) per character:
Kermit: MAP=0.632
StatlerWaldorf: MAP=0.754
Fozzie Bear: MAP=0.288

Overall MAP (all characters): 0.558


In [13]:
# --- Save VISUAL predictions for fusion ---

KEY_COLS = ["Video", "Frame_number", "Timestamp"]

visual_pred = test_df[KEY_COLS].copy()

for ch in SIM1_CHARACTER_LABEL_COLS:
    if f"{ch}_score" in y_test_df.columns:
        visual_pred[f"{ch}_score"] = y_test_df[f"{ch}_score"].values
    else:
        visual_pred[f"{ch}_score"] = y_test_df[f"{ch}_present"].values

    visual_pred[f"{ch}_present"] = y_test_df[f"{ch}_present"].values

out_path = "../data/processed/preds/visual_sim1_pred.csv"
visual_pred.to_csv(out_path, index=False)

print(f"[OK] Visual predictions saved to {out_path}")
visual_pred.head()


[OK] Visual predictions saved to ../data/processed/preds/visual_sim1_pred.csv


Unnamed: 0,Video,Frame_number,Timestamp,Kermit_score,Kermit_present,StatlerWaldorf_score,StatlerWaldorf_present,Fozzie Bear_score,Fozzie Bear_present
0,211,29251,19:30.04,0,0,0,0,0,0
1,211,29252,19:30.08,0,0,0,0,0,0
2,211,29253,19:30.12,0,0,0,0,0,0
3,211,29254,19:30.16,0,0,0,0,0,0
4,211,29255,19:30.20,0,0,0,0,0,0
