In [11]:
%run /home/dbaciur/NTU/NTU/notes/Constants.ipynb

Constants loaded


In [12]:
import pandas as pd
import numpy as np
import os
import pickle

import matplotlib.pyplot as plt
from tqdm import tqdm

In this file we want to check if all point events are covered by state events, in other words - if every point event is within a time range of some state event regarding same behavior.

### Import annotations

In [13]:
annotations_df = pd.read_csv(C.PROCESSED_ANNOTATIONS_PATH, converters=C.A_CONVERTERS)
annotations_df = annotations_df[annotations_df[C.A_BEH_ID].isin(C.REL_BEHS)]

### Analyze coverage

In [15]:
videos_set = annotations_df[C.A_VIDEO].unique()
number_of_videos = len(videos_set)
number_of_videos

114

In [17]:
CHARTS = (C.B_CHARTS_S, C.B_CHARTS_P)
IMAGES = (C.B_IMAGES_S, C.B_IMAGES_P)
WEBSITES = (C.B_WEBSITE_S, C.B_WEBSITE_P)
FILMS = (C.B_FILMS_S, C.B_FILMS_P)

In [18]:
state_point_beh_pairs = [
    CHARTS,
    IMAGES,
    WEBSITES,
    FILMS
]

In [21]:
coverage_data_by_beh_pair = dict()
for beh_pair in state_point_beh_pairs:
    coverage_data_by_beh_pair[beh_pair] = []
    
for video in tqdm(videos_set):
    
    for state_point_pair in state_point_beh_pairs:
        
        state_beh_id = state_point_pair[0]
        point_beh_id = state_point_pair[1]
        
        video_annotations = annotations_df[annotations_df[C.A_VIDEO] == video]
        state_events = video_annotations[video_annotations[C.A_BEH_ID] == state_beh_id]
        point_events = video_annotations[video_annotations[C.A_BEH_ID] == point_beh_id]
    
        point_events_count = len(point_events)
        covered_points_count = 0
        
        if point_events_count == 0:
            continue
        
        for point_index, point_event in point_events.iterrows():
            
            p_start_ts = point_event[C.A_START]
            p_end_ts = point_event[C.A_STOP]
            
            for state_index, state_event in state_events.iterrows():
                
                s_start_ts = state_event[C.A_START]
                s_end_ts = state_event[C.A_STOP]
                
                no_collision = (p_end_ts < s_start_ts or p_start_ts > s_end_ts)
                if not no_collision:
                    covered_points_count += 1
                    break
        
        coverage_percentage = 100 * (covered_points_count / point_events_count)
        coverage_data_by_beh_pair[state_point_pair].append(coverage_percentage)

100%|██████████| 114/114 [00:09<00:00, 11.78it/s]


In [22]:
def get_mean(beh_pair):
    return np.array(coverage_data_by_beh_pair[beh_pair]).mean()

In [23]:
get_mean(CHARTS)

99.24230513922369

In [24]:
get_mean(IMAGES)

99.8133975812547

In [25]:
get_mean(WEBSITES)

100.0

In [26]:
get_mean(FILMS)

100.0