In [1]:
"""
!DEPRECATED SCRIPT!: Merges and video-length are based on the raw-uncut videos in this script. 
To have an accurate estimate, we should consider the clip_end-clip-start tied to a unique clip_uid in the annotations.


This notebook analysis the Ego4d Long-term Action Anticipation (LTA)
splits separately. These are the annotated available subsets.
The annotations are linked to the meta-data json through video_uid.
"""

import json
import os.path as osp
import json
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)       

EXECUTE = False # __name__ == '__main__' and '__file__' in globals()

# Parsing dataframes from jsons

In [2]:
# Entire dataset Meta-data DF
meta_data_file_path = "/fb-agios-acai-efs/Ego4D/ego4d_data/ego4d.json"
with open(meta_data_file_path, 'r') as meta_data_file:
    meta_data_obj = json.load(meta_data_file)

# Convert to DF
pd.json_normalize(meta_data_obj) # L1 overview
video_df = pd.json_normalize(meta_data_obj['videos'])

if EXECUTE:
    video_df.head(n=20)

In [3]:
# Get annotation DF for Long Term Action Anticipation (LTA)
MODE='train' # train/test/val

annotation_file_names = [n.strip() for n in """
fho_lta_debug_val.json
fho_lta_taxonomy.json
fho_lta_test_unannotated.json
fho_lta_train.json
fho_lta_train_10000.json
fho_lta_train_5000.json
fho_lta_val.json
""".split('\n') if len(n)> 0]
print(annotation_file_names)

annotation_file_dir = "/fb-agios-acai-efs/Ego4D/ego4d_data/v1/annotations"
annotation_file_name = {'train':"fho_lta_train.json",'val':'fho_lta_val.json','test':'fho_lta_test_unannotated.json'}[MODE]
assert annotation_file_name in annotation_file_names
annotation_file_path = osp.join(annotation_file_dir, annotation_file_name)

with open(annotation_file_path, 'r') as annotation_file:
    annotation_obj = json.load(annotation_file)
    
# Check out the annotations
# print(annotation_obj.keys())
print(pd.json_normalize(annotation_obj).head(n=20))
"""
The annotation entries indicate a unique action in a timeframe of a specific video.
There are multiple clips 'clip_uid' (and hence actions) per 'video_uid', and multiple actions per clip_uid.
e.g. search on video_uid '9c59e912-2340-4400-b2df-7db3d4066723', resulting in 74 unique clip_uids
The 'action_idx'=k indicates the k-th action in that clip.
"""

# Convert clips to DF
labeled_clips_df = pd.json_normalize(annotation_obj['clips'])

# SELECT WHERE... examples for a single clip_id or video_uid
# labeled_clips_df.loc[labeled_clips_df['clip_id'] == 1805] # SELECT WHERE...
# labeled_clips_df.loc[labeled_clips_df['video_uid'] == '8b72b54f-f87c-4fdb-84dc-97e42ab111ac'] # SELECT WHERE...
if EXECUTE:
    print(labeled_clips_df.head(n=20))

['fho_lta_debug_val.json', 'fho_lta_taxonomy.json', 'fho_lta_test_unannotated.json', 'fho_lta_train.json', 'fho_lta_train_10000.json', 'fho_lta_train_5000.json', 'fho_lta_val.json']
  version    date                                        description  split  \
0     1.0  220217  Ego4d Long Term Anticipation Forecasting Annot...  train   

                                               clips  
0  [{'video_uid': '9c59e912-2340-4400-b2df-7db3d4...  


# Looking into the meta-data

In [4]:
# Meta-data summary per user
# Group by user (fb_participant_id) and accumulate video_count/duration/scenarios
# Null/unassigned users are omitted automatically

scenarios_per_user_df = video_df.groupby(video_df['fb_participant_id'], as_index=False).agg({'scenarios':list,'video_uid':'count','duration_sec':'sum'})
scenarios_per_user_df['scenarios'] = scenarios_per_user_df['scenarios'].apply(np.concatenate).apply(Counter) # FLATTEN
scenarios_per_user_df = scenarios_per_user_df.rename(columns={"video_uid":"video_count"})

if EXECUTE:
    scenarios_per_user_df

## About the annotations
There is one entry per supervision-label. This may contain multiple clips and multiple video fragments.
A clip can be a sub-video of the parent video (probably because some videos very long for continuous labeling). One clip_id has multiple entries for all actions happening in that clip.

The action label is identified by ('verb','noun') or ('verb_label','noun_label'). Don't use 'action_idx', this indicates the 'action-idx'-th action in a clip (action counter)!

Multiple actions can happen at the same time! (Overlap in clip time ranges)

Through the video_uid we can link the annotations to the user. (Linking meta-data with the annotation-data).


# Linking the meta-data and annotation dataframes

In [5]:
# First link labels to videos - INNER JOIN of meta-object and labels-object
joined_df = pd.merge(video_df, labeled_clips_df, on="video_uid",validate="one_to_many",how="inner") # Inner-join (Intersection of  video-uid values)


if __name__ == "__main__":
    print(video_df.shape)
    print(f"labels_df={labeled_clips_df.shape}")
    print(f"joined_df={joined_df.shape} -> Good, should have same #rows as labels-df, only keys are extended with info from meta-data object, such as user id etc")
    joined_df

def duration_row_fn(x):
    video_uid_list = x[0]
    video_len_list = x[1]

    uniqe_video_uid_list, idxs = np.unique(video_uid_list,return_index=True)
    sum_unique_video_len = sum(np.asarray(video_len_list,dtype=np.float64)[idxs])

    return sum_unique_video_len
    
# Then group by user and aggregate columns
def group_innerjoin_by_user(joined_df):
    user_df = joined_df.copy(deep=True).groupby(joined_df['fb_participant_id'], as_index=False).agg(
        {'fb_participant_id':'first','scenarios':list,'verb':list,'noun':list, 'verb_label':list, 'noun_label':list,
         'video_uid':list,'duration_sec':list,'clip_id':list,'action_idx':list,
        'clip_parent_start_sec':list,'clip_parent_end_sec':list}) # VIDEO UID COUNT IS NOT CORRECT! Also counts non-uniques in join

    user_df['rawvideo_duration_sec_sum'] = user_df.loc[:,('video_uid','duration_sec')].apply(duration_row_fn, axis=1)
    user_df['video_count'] = user_df['video_uid'].apply(lambda x: len(np.unique(x))) # BUGFIX: we need only UNIQUE video_uids after inner_join
    user_df['scenarios'] = user_df['scenarios'].apply(np.concatenate) # FLATTEN
    user_df['action_count'] = user_df['action_idx'].apply(lambda x:len(x))
    user_df.drop('duration_sec', axis=1, inplace=True) # Make sure we don't use this one further on
    return user_df

user_df = group_innerjoin_by_user(joined_df)

if EXECUTE:
    user_df

(9645, 54)
labels_df=(23610, 20)
joined_df=(23610, 73) -> Good, should have same #rows as labels-df, only keys are extended with info from meta-data object, such as user id etc


# Convert (verb,noun) into actions and create Counter-columns

In [11]:
# MERGE (verb,noun) columns in_place
def create_action_labels_from_verbnoun(user_df):
    def label_fn(x):
        assert len(x) == 2, "Need two columns to merge"
        if not isinstance(x[0],list):
            assert not isinstance(x[1],list)
            return f"{x[0]}-{x[1]}"

        return [f"{l}-{r}" for l,r in zip(x[0],x[1])]
    
    user_df['action_label'] = user_df.loc[:,('verb_label','noun_label')].apply(label_fn,axis=1)
    print("Created action_label column")
    
    user_df['action'] = user_df.loc[:,('verb','noun')].apply(label_fn, axis=1)
    print("Created action column")
    
# APPLY COUNTER OBJECTS ON ACTIONS
def create_counter_columns(df, cols=('action_label','action')):
    assert isinstance(cols,tuple)
    for col in cols:
        print(f"Created column Counter: {col}")
        df[f"{col}_count"] = df[col].apply(Counter)
        
def get_normalized_distr_actions(user_action_freq_df, target_col='action_count',new_col='action_distr'):
    user_action_freq_df[new_col] = user_action_freq_df[target_col].apply(
        lambda x:sorted([el/sum(list(x.values())) for el in x.values()],reverse=True)
    )

    # Max nb of different scenarios for 1 user (for zero padding the rest for means/stds)
    max_action_list_len = user_action_freq_df[new_col].apply(len).max()
    print(max_action_list_len)

    # Append zero counts for others
    user_action_freq_df[new_col] = user_action_freq_df[new_col].apply(
        lambda x:np.pad(x,(0,max_action_list_len - len(x)))
    )

# In-place operations
create_action_labels_from_verbnoun(user_df)
create_counter_columns(user_df)
get_normalized_distr_actions(user_df)

user_action_freq_df = user_df

Created action_label column
Created action column
Created column Counter: action_label
Created column Counter: action
344


# Summarize actions per video

In [7]:
# Stats and checks on actions

# Calculate on avg how many actions on avg per video
# First avg over actions in video, then avg over users, or equivalent: #user-action/#user-videos
user_action_freq_df['actions_per_video'] = user_action_freq_df.loc[:,('video_count','action_count')].apply(
    lambda x: float(sum(x[1].values()))/float(x[0]), 
    axis=1)
print(f"We have {user_action_freq_df['actions_per_video'].mean()} actions/video avgd over users")

# Group labeled clips by unique_videos
print(f"video_df={video_df.shape}, labels_df={labeled_clips_df.shape}")
group_labeled_clips_df = labeled_clips_df.groupby(labeled_clips_df['video_uid'], as_index=False).agg(
    {'verb':list,'noun':list, 'verb_label':list, 'noun_label':list,'clip_id':list,'action_idx':list})
group_labeled_clips_df['action_count'] = group_labeled_clips_df['action_idx'].apply(lambda x: len(x))
print(group_labeled_clips_df.head(n=20))

# CHECK IF ALL ANNOTATION VIDEOS EXIST IN META-DATA DF
print("All video_uid's in annotations exist in the meta data object = {}".format(
(group_labeled_clips_df.video_uid.isin(video_df.video_uid) == True).all()
))

We have 47.56886574074074 actions/video avgd over users
video_df=(9645, 54), labels_df=(23610, 20)
                               video_uid  \
0   002d2729-df71-438d-8396-5895b349e8fd   
1   01db7c39-a512-4bac-b284-dff8c7360e80   
2   02995fb6-f8ac-4168-a60d-8a0b1a7210bd   
3   05e8b510-0973-4cbd-9a23-bf2c156b7958   
4   05f672ea-1651-4767-9c8a-0f504805e9c7   
5   062ded0e-1df8-42d1-adaa-fc948e1cd7de   
6   078f6bad-aa22-48bf-9df2-4d2f6ba7b556   
7   080657b3-7f23-4285-96ec-39136e58cdf1   
8   0836e1a4-11e6-4b31-bd39-f8e083fdadb3   
9   099f6f96-5aa7-4da8-a5e0-2e8bc03beee6   
10  0b24eb9e-a5ae-4389-b2cd-11fbf88e9c4a   
11  0b6fc89d-bf4b-44f3-82e7-67ee02517459   
12  0be30efe-9d71-4698-8304-f1d441aeea58   
13  0c192ca8-1ede-4ef0-a05e-2f4151b6bdfc   
14  0c8c2f4b-a006-47ca-8826-133af1dfb632   
15  0cb2dd94-afb1-4e30-a62f-724f34d81777   
16  0d8a3e5c-4263-4f80-b32d-39d4f33008ba   
17  0e6fb738-05fc-4dd5-9746-a8e10efe8c20   
18  0fbf42b1-23ed-4a2b-ad71-ab438b45e0d2   
19  0fe191ef-c28a-422

# Plot stacked barchart: Action distr per user

In [13]:
# Other visualisation: users on x-axis and y-axis is multi-barplot where scenarios ordered on freq
def check_sum_one(list_of_distr):
    eps = 1e-5
    for distr in list_of_distr:
        prob_sum = sum(distr)
        assert 1-eps < prob_sum < 1 + eps, f"prob_sum={prob_sum} NOT SUMMING TO ONE"

def plot_useractions_stackedbarchart(user_action_freq_df,sort_by_col="duration_sec_sum",y_col="action_distr",
                                    title="Stacked action freq per user - SORTED on total user video len (min)",
                                    xlabel="User idx - SORTED on total user video len (min) ",
                                    ylabel="Stacked action freq",
                                    ):
    df_sorted = user_action_freq_df.sort_values(by=[sort_by_col],ascending=False)
    sorted_col_list = df_sorted[sort_by_col].tolist()
    # print(df_sorted)

    # Stack bar plots: 
    stack_user_counts_s = df_sorted[y_col].tolist()
    check_sum_one(stack_user_counts_s)

    # Transpose: List of 1st entries over users, list of 2nd entries, etc
    stack_user_counts_st = np.asarray(stack_user_counts_s)
    check_sum_one(stack_user_counts_st)
    stack_user_counts_st = stack_user_counts_st.transpose()
    print(f"Stacked barchart plot entries (x=Users, y=user with max different actions)=({stack_user_counts_st.shape})")

    plt.figure(figsize=(20, 7), dpi=600) # So all bars are visible!
    x_axis = list(range(len(stack_user_counts_st[0])))

    # my_cmap = plt.get_cmap("hsv")
    my_cmap = plt.get_cmap("prism")
    color_steps = np.linspace(0, 1, len(stack_user_counts_st))

    # Config
    bar_width = 1

    # First bar
    prev_bar_values = stack_user_counts_st[0]
    plt.bar(x_axis, height=prev_bar_values, align='center', width=bar_width,color=my_cmap(color_steps[0]))
    for idx, stack_user_count in enumerate(stack_user_counts_st[1:]):
        color = my_cmap(color_steps[idx+1])
        plt.bar(x_axis, height=stack_user_count, color=color,bottom=prev_bar_values, align='center', width=bar_width)

        assert len(prev_bar_values) == len(stack_user_count.tolist())
        prev_bar_values = [x+y for x,y in zip(prev_bar_values,stack_user_count.tolist())]


    plt.ylim(None,None)
    plt.xlim(None,None)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)

    # plt.grid(grid)
    plt.show()
    plt.clf()
    
    return sorted_col_list

if EXECUTE:
    video_lens_s = plot_useractions_stackedbarchart(user_action_freq_df)

# Plot barchart for total video length/user

In [19]:
# Video lengths for this plot
# Barchart API: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.bar.html#matplotlib.pyplot.bar
def plot_barchart(x_axis, y_vals, title,ylabel,xlabel='User-ID', 
                  grid=False,yerror=None,xerror=None, y_labels=None, x_labels=None,bar_align='edge',barh=False,
                 figsize=(12, 6), log=False, interactive=False,x_minor_ticks=None):
    max_val = max(y_vals)
    my_cmap = plt.get_cmap("plasma")
    fig = plt.figure(figsize=figsize, dpi=600) # So all bars are visible!
    ax=plt.subplot()
    
    if not barh:
        bars = plt.bar(x_axis, height=y_vals,color=my_cmap.colors, align=bar_align,yerr=yerror,width=0.9,log=log)
    else:
        bars = plt.barh(y_vals, width=x_axis,color=my_cmap.colors, align=bar_align,xerr=xerror,height=0.9,log=log)
    
    if x_minor_ticks is not None:
#         ax.set_xticks(major_ticks)
        ax.set_xticks(x_minor_ticks, minor=True)
#         ax.set_yticks(major_ticks)
#         ax.set_yticks(minor_ticks, minor=True)

        # And a corresponding grid
#         ax.grid(which='both')

#         # Or if you want different settings for the grids:
#         ax.grid(which='minor', alpha=0.2)
#         ax.grid(which='major', alpha=0.5)

    if x_labels:
        plt.xticks(x_axis, x_labels, rotation='vertical')
    if y_labels:
        plt.yticks(y_vals, y_labels)
        

    
    plt.ylim(None,max_val*1.01)
    plt.xlim(None,None)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.grid(grid, which='both')
    
    if interactive:
        annot = ax.annotate("", xy=(0,0), xytext=(-20,20),textcoords="offset points",
                    bbox=dict(boxstyle="round", fc="black", ec="b", lw=2),
                    arrowprops=dict(arrowstyle="->"))
        annot.set_visible(False)
        fig.canvas.mpl_connect("motion_notify_event", hover)
        
    plt.show()
    plt.clf()
    
    print(f"HEAD = {y_vals[:10]}...")
    print(f"TAIL = ...{y_vals[-10:]}")

def update_annot(bar):
    """ Interactive hovering: Change label visible """
    x = bar.get_x()+bar.get_width()/2.
    y = bar.get_y()+bar.get_height()
    annot.xy = (x,y)
    text = "({:.2g},{:.2g})".format( x,y )
    annot.set_text(text)
    annot.get_bbox_patch().set_alpha(0.4)


def hover(event):
    """ Interactive hovering: hover event """
    vis = annot.get_visible()
    if event.inaxes == ax:
        for bar in bars:
            cont, ind = bar.contains(event)
            if cont:
                update_annot(bar)
                annot.set_visible(True)
                fig.canvas.draw_idle()
                return
    if vis:
        annot.set_visible(False)
        fig.canvas.draw_idle()

if EXECUTE:
    y_axis = [int(x/60) for x in video_lens_s]
    x_axis = [idx for idx in range(len(video_lens_s))]
    plot_barchart(x_axis, y_axis, title='Video length (min) per user',ylabel='Video length (min)',xlabel="user")
