In [1]:
from collections import Counter, defaultdict
import json
import os

import pandas as pd
pd.set_option('display.max_rows', 999)
pd.set_option('precision', 2)
import parse

# Define some global variables and functions

In [2]:
FPS_USED_TO_EXTRACT_FRAMES = 5.07

act2rep = {
    'none': 'none',
    'None': 'none',
    'nonoe': 'none',
    'noen': 'none',
    'Drinking ': 'Drinking ',
    'drinking': 'Drinking ',
    'phone call': 'phone call',
    'Phone call': 'phone call',
    'call': 'phone call',
    'nodding': 'nodding',
    'Nodding': 'nodding',
    'walking ': 'walking ',
    'walking': 'walking ',
    'Holding something': 'Holding something',
    'holding a jacket': 'Holding something',
    'holding a cup': 'Holding something',
    'holding paper': 'Holding something',
    'holding  something ': 'Holding something',
    'holding something': 'Holding something',
    'holding a chair': 'Holding something',
    'holding newspaper': 'Holding something',
    'holding something ': 'Holding something',
    'hold something': 'Holding something',
    'hodling a bottle': 'Holding something',
    'holding a paper': 'Holding something',
    'holding a telephone': 'Holding something',
    'holding a bottle': 'Holding something',
    'eating': 'eating',
    'standing': 'standing',
    'Standing up': 'standing',
    'stading up': 'standing',
    'standing up': 'standing',
    'standing up ': 'standing',
    'find something': 'find something',
    'finding something': 'find something',
    'Finding something': 'find something',
    'waving hands': 'waving hands',
    'Waving hands': 'waving hands',
    'pushing away': 'pushing away',
    'pusing away': 'pushing away',
    'cooking': 'cooking',
    'sitting down': 'sitting down',
    'Sitting down': 'sitting down',
    'sitting on': 'sitting down',
    'watching': 'watching',
    'watching TV': 'watching',
    'watching tv': 'watching',
    'high-five': 'high-five',
    'High five': 'high-five',
    'high-five ': 'high-five',
    'high five': 'high-five',
    'opening door': 'opening door',
    'singing': 'singing',
    'shaking hands': 'shaking hands',
    'dance': 'dance',
    'danicng': 'dance',
    'dancing': 'dance',
    'cutting ': 'cutting ',
    'look back on': 'look back on',
    'look back at': 'look back on',
    'look back at ': 'look back on',
    'Look back at': 'look back on',
    'Pointing out': 'Pointing out',
    'pointing out': 'Pointing out',
    'kissing': 'kissing',
    'cup': 'cup',
    ' ': ' ',
    "Putting arms around each other's shoulder": "Putting arms around each other's shoulder",
    'Putting arms around each other’s shoulder': "Putting arms around each other's shoulder",
    'putting arms around each other’s shoulder': "Putting arms around each other's shoulder",
    "putting arms around each other's shoulder": "Putting arms around each other's shoulder",
    'destroy something': 'destroy something',
    'hugging': 'hugging',
    'Hugging': 'hugging',
    'wearing lipstick': 'wearing lipstick',
    'desk': 'desk',
    'cleaning up': 'cleaning up',
    'playing guitar': 'playing guitar',
    'smoking': 'smoking'
}
actions = list(act2rep.keys())
representative_actions = list(set(act2rep.values()))

def timestr_to_seconds(timestr):
    time_parser = parse.compile("{:d}:{:d}:{:d};{:d}")
    h, m, s, ms = time_parser.parse(timestr)
    seconds = 3600*h + 60*m + s + 1/60*ms
    return seconds

# Load data

In [3]:
annotations_list = []
annotation_root_dpath = "./data/friends_trimmed/annotations"
for annotation_fname in os.listdir(annotation_root_dpath):
    fpath = os.path.join(annotation_root_dpath, annotation_fname)
    with open(fpath, 'r') as fin:
        annotations = json.load(fin)
    annotations_list.append(annotations)

# Basic Statistics

In [4]:
frame_counter = { a: 0 for a in representative_actions }
clip_counter = { a: 0 for a in representative_actions }
for annotations in annotations_list:
    for annotation in annotations["visual_results"]:
        n_seconds = timestr_to_seconds(annotation["end_time"]) - timestr_to_seconds(annotation["start_time"])
        n_frames = int(n_seconds * FPS_USED_TO_EXTRACT_FRAMES)
        for person, info in annotation["person"][0].items():
            action = info[0]["behavior"]
            representative_action = act2rep[action]
            
            frame_counter[representative_action] += n_frames
            clip_counter[representative_action] += 1

In [5]:
data = []
for action in representative_actions:
    n_frames = frame_counter[action]
    n_clips = clip_counter[action]
    n_frames_per_clip = n_frames / n_clips
    data.append([ action, n_frames, n_clips, n_frames_per_clip ])
data = sorted(data, key=lambda x: -x[2])
pd.DataFrame(data, columns=["action", "#frames", "#clips", "avg. #frames per clip"])

Unnamed: 0,action,#frames,#clips,avg. #frames per clip
0,none,2142347,101432,21.12
1,Holding something,96949,4482,21.63
2,standing,57873,2277,25.42
3,sitting down,31957,1438,22.22
4,look back on,11967,696,17.19
5,walking,17520,678,25.84
6,eating,7435,369,20.15
7,phone call,7698,307,25.07
8,watching,3088,214,14.43
9,Pointing out,4506,207,21.77


## Trim data

* Exclude "none"
* Exclude actions whose the number of clip is less than 2

In [6]:
trimmed_data = []
for row in data:
    action, n_frames, n_clips, n_frames_per_clip = row
    if action == "none": continue
    if n_clips < 2: continue
    
    trimmed_data.append(row)
pd.DataFrame(trimmed_data, columns=["action", "#frames", "#clips", "avg. #frames per clip"])

Unnamed: 0,action,#frames,#clips,avg. #frames per clip
0,Holding something,96949,4482,21.63
1,standing,57873,2277,25.42
2,sitting down,31957,1438,22.22
3,look back on,11967,696,17.19
4,walking,17520,678,25.84
5,eating,7435,369,20.15
6,phone call,7698,307,25.07
7,watching,3088,214,14.43
8,Pointing out,4506,207,21.77
9,opening door,3646,179,20.37


# Save trimmed mapping files

In [7]:
trimmed_rep2sta = {}
trimmed_representative_actions = set()
for row in trimmed_data:
    rep, n_frames, n_clips, _ = row
    trimmed_representative_actions.add(rep)
    trimmed_rep2sta[rep] = {
        "n_frames": n_frames,
        "n_clips": n_clips,
    }
trimmed_rep2idx = { rep: idx for idx, rep in enumerate(trimmed_representative_actions) }
trimmed_idx2rep = { idx: rep for rep, idx in trimmed_rep2idx.items() }
trimmed_act2idx = { act: trimmed_rep2idx[act2rep[act]] for act in actions if act2rep[act] in trimmed_rep2idx }

with open("data/rep2sta.json", 'w') as fout:
    json.dump(trimmed_rep2sta, fout, indent=2, sort_keys=True)
with open("data/act2idx.json", 'w') as fout:
    json.dump(trimmed_act2idx, fout, indent=2, sort_keys=True)
with open("data/rep2idx.json", 'w') as fout:
    json.dump(trimmed_rep2idx, fout, indent=2, sort_keys=True)
with open("data/idx2rep.json", 'w') as fout:
    json.dump(trimmed_idx2rep, fout, indent=2, sort_keys=True)