In [4]:
import sys
sys.path.append("..")
from utils import visual_tools as vt

# Preprocessing Episodes

> Train-test split

> **Choice of split:**\
> There are 4 main characters that we need to identify so the split time from a given episode is selected based on the equal (rough idea) no of apperances of the all all character in both splits.

In [5]:
# fps -> acc to processign requirements
FPS_TO_SAVE = 4

# Episode 1
EPISODE_1_PATH = "../data/raw/video/Muppets-02-01-01.avi"
EPISODE_1_SKIP_INTRO_TILL = "00:17"
EPISODE_1_SPLIT_AT = "19:30"

# Episode 2
EPISODE_2_PATH = "../data/raw/video/Muppets-02-04-04.avi"
EPISODE_2_SKIP_INTRO_TILL = "00:00"
EPISODE_2_SPLIT_AT = "00:00"

# Episode 3
EPISODE_3_PATH = "../data/raw/video/Muppets-03-04-03.avi"
EPISODE_3_SKIP_INTRO_TILL = "00:08"
EPISODE_3_SPLIT_AT = "00:00"

In [6]:
# Video info
ep1_info = vt.video_info(EPISODE_1_PATH)
print(ep1_info)

# Extract frames
frames_dir, total_saved = vt.extract_frames(EPISODE_1_PATH, fps_to_save=FPS_TO_SAVE)

# Get metadata and skip intro
frames, _, info = vt.extract_frames_meta(EPISODE_1_PATH, intro_timestamp=EPISODE_1_SKIP_INTRO_TILL, fps_to_save=FPS_TO_SAVE)

# Split train/test
train, test = vt.split_frames(frames, EPISODE_1_PATH, split_timestamp=EPISODE_1_SPLIT_AT, fps_to_save=FPS_TO_SAVE)

{'fps': 25.0, 'width': 720, 'height': 544, 'num_frames': 38682, 'fourcc': 877677894, 'duration_sec': 1547.28}
[frames ok] saved=6447 frames in ../data/raw/video/Muppets-02-01-01-frames
[YoHoo! split done :>] saved frames: train=4680, test=1699


# Feature Extraction

In [None]:
import numpy as np
from colorthief import ColorThief
import colorsys
import csv

import pandas as pd

## Feature Extraction:

### Getting train and test features features

In [None]:
episode_name = "Muppets-02-01-01"
features_dir = f"../data/processed/video/{episode_name}/features/"

train_frames_small = ["frame175.jpg", "frame176.jpg", "frame177.jpg", "frame311.jpg", "frame312.jpg", "frame313.jpg", "frame1648.jpg", "frame1692.jpg", "frame4678.jpg"]

test_frames_small = ["frame5281.jpg", "frame5382.jpg", "frame5380.jpg", "frame5434.jpg", "frame5427.jpg"]

train_frames_dir = f"../data/processed/video/{episode_name}/train/"
test_frames_dir  = f"../data/processed/video/{episode_name}/test/"

# --- Dominant color ---
train_colors, train_colors_csv = vt.dominant_color_feature(train_frames_dir, episode_name, frame_files=train_frames_small)
test_colors,  test_colors_csv  = vt.dominant_color_feature(test_frames_dir, episode_name, frame_files=test_frames_small)

# --- Green mask feature ---
train_green, train_green_csv = vt.green_mask_feature(train_frames_dir, episode_name, frame_files=train_frames_small)
test_green,  test_green_csv  = vt.green_mask_feature(test_frames_dir, episode_name, frame_files=test_frames_small)

# --- Edge feature ---
train_edges, train_edges_csv = vt.edge_magnitude_feature(train_frames_dir, episode_name, frame_files=train_frames_small)
test_edges,  test_edges_csv  = vt.edge_magnitude_feature(test_frames_dir, episode_name, frame_files=test_frames_small)


[Dominant color] saved 9 frames -> ../data/processed/video/Muppets-02-01-01/features/dominant_color_train.csv
[Dominant color] saved 5 frames -> ../data/processed/video/Muppets-02-01-01/features/dominant_color_test.csv
[Green mask] saved 9 frames -> ../data/processed/video/Muppets-02-01-01/features/green_mask_train.csv
[Green mask] saved 5 frames -> ../data/processed/video/Muppets-02-01-01/features/green_mask_test.csv
[Edge magnitude] saved 9 frames -> ../data/processed/video/Muppets-02-01-01/features/edge_magnitude_train.csv
[Edge magnitude] saved 5 frames -> ../data/processed/video/Muppets-02-01-01/features/edge_magnitude_test.csv


### Making dataframes for model processing

In [None]:
# combine train features
X_train = pd.concat([
    pd.DataFrame(train_colors).drop(columns='frame'),
    pd.DataFrame(train_edges).drop(columns='frame'),
    pd.DataFrame(train_green).drop(columns='frame')
], axis=1)

# combine test features
X_test = pd.concat([
    pd.DataFrame(test_colors).drop(columns='frame'),
    pd.DataFrame(test_edges).drop(columns='frame'),
    pd.DataFrame(test_green).drop(columns='frame')
], axis=1)



## Model training