# 20 Billion Something-Something

Script for processing the 20bn dataset.



In [1]:
%load_ext autoreload
%autoreload 2

## Display video grid

In [2]:
# !pip install pysymbolic #-e ../../symbolic

In [3]:
# !pip install mediapipe==0.10.0

In [4]:
# !pip install numpy h5py hdf5plugin Pillow tqdm pandas av seaborn ipywidgets opencv-python 'mediapipe==0.10.0'
# !pip install torch torchvision tensorboard

In [5]:
import pathlib
import typing

import sys
sys.path.append("..")

from gpred import video_utils
from env import twentybn

In [6]:
# random.seed(0)

In [7]:
import os
import pickle
import time
import symbolic
import config

paths = config.EnvironmentPaths(environment="twentybn")
pddl = symbolic.Pddl(str(paths.env / "domain.pddl"), str(paths.env / "problem.pddl"))
# pddl = symbolic.Pddl(str(paths.domain_pddl), str(paths.problem_pddl))

# Generate labels

## Load datasets

### 20BN Something Something

In [8]:
!ls /Something2

SomethingElse  data  labels  videos


In [9]:
# !ln -s /Something2/labels ../data/twentybn/labels
# !ln -s /Something2/videos ../data/twentybn/videos
# !ln -s /Something2/SomethingElse ../data/twentybn/SomethingElse

## Reformat labels

In [10]:
from s20bn_utils.predicate_tests import generate_tests, precompute_tests, process_action
from s20bn_utils.build_dataset import append_pre_post_to_dataset
from s20bn_utils.build_dataset import build_dataset

In [11]:
train_set, val_set, video_labels, action_instances = build_dataset()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [02:37<00:00, 39.26s/it]


../data/twentybn
../data/twentybn/labels.hdf5 exists


In [12]:
labels = twentybn.dataset.Labels(paths.data / "labels.hdf5")

# Extract pre and post frames

## Detect hands

In [13]:
import os
import concurrent.futures
import pickle
import time
import symbolic
from s20bn_utils.precompute_hands import load_hands

In [None]:
hands = load_hands()
for k in hands:
    hands[k] = [x[0] for x in hands[k]]  # only if return [r] in hand_detecotr.py
len(hands)

## Evaluate pre/post-condition tests

In [78]:
from s20bn_utils.build_dataset import append_pre_post_to_dataset
from s20bn_utils.predicate_tests import evaluate_pre_post

In [79]:
tests = generate_tests(pddl)

In [80]:
from apps.hand_detector import Hand

In [81]:
# for i in hands:
#     h=hands[i]
#     for hi in h:
#         print(len(hi), [[len(x) for x in x] for x in hi], hi)
#     input()

In [83]:
test_results = evaluate_pre_post(pddl, paths, hands, tests, labels)

  v_line /= np.linalg.norm(v_line)
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 174/174 [15:50<00:00,  5.46s/it]


NameError: name 'pickle' is not defined

In [None]:
append_pre_post_to_dataset(test_results, paths)

## Compute condition test statistics

In [None]:
import typing

import h5py
import numpy as np
import pandas as pd
import tqdm.notebook

import config

def compute_condition_statistics(paths: config.EnvironmentPaths, train_set: typing.List[int], val_set: typing.List[int]):
    """Computes pre/post-condition statistics for the 20BN dataset.
    
    Args:
        paths: Environment paths.
        train_set: Video ids in the original 20BN train set.
        val_set: Video ids in the original 20BN val set.
    Returns:
    | Video | Action | Dataset | Pre | Post |
    """
    df = {
        "Video": [],
        "Action": [],
        "Dataset": [],
        "Pre": [],
        "Post": [],
    }
    val_set = set(val_set)
    with h5py.File(paths.data / "labels.hdf5", "r") as f:
        grp_videos = f["videos"]
        video_ids = np.array(f["video_ids"])
        for id_video in tqdm.notebook.tqdm(video_ids):
            grp_video = grp_videos[str(id_video)]
            
            id_action = int(grp_video.attrs["id_action"])
            
            # Assume video is in either train or val set.
            dataset = "val" if id_video in val_set else "train"
            
            pre = grp_video["pre"].size
            post = grp_video["post"].size
            
            df["Video"].append(id_video)
            df["Action"].append(id_action)
            df["Dataset"].append(dataset)
            df["Pre"].append(pre)
            df["Post"].append(post)
    
    df = pd.DataFrame(df)
    
    return df

In [None]:
stats = compute_condition_statistics(paths, train_set, val_set)

## Generate train, val, test splits

In [None]:
train_set, val_set, test_set = generate_dataset_splits(pddl, stats, train_set, val_set)

print(f"Train: {len(train_set)}")
print(f"Val: {len(val_set)}")
print(f"Test: {len(test_set)}")

with open(paths.data / "dataset_splits.pkl", "wb") as f:
    pickle.dump((train_set, val_set, test_set), f)

## Analyze condition statistics

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.subplots(figsize=(5, 40))

stats[["Action"]] \
    .assign(Pre = stats.Pre > 0, Post = stats.Post > 0) \
    .groupby("Action", as_index=False) \
    .mean() \
    .melt(id_vars="Action", value_vars=["Pre","Post"], var_name="Condition") \
    .pipe((sns.barplot, "data"), y="Action", x="value", hue="Condition", orient="h")

plt.savefig("figures/pre_post.png", bbox_inches="tight", transparent="True", pad_inches=0)
plt.show()

In [None]:
print(f"Partial: {len(stats[(stats.Pre > 0) | (stats.Post > 0)])} / {len(stats)}")
print(f"Complete: {len(stats[(stats.Pre > 0) & (stats.Post > 0)])} / {len(stats)}")
print(f"Train: {len(stats[(stats.Dataset == 'train') & (stats.Pre > 0) & (stats.Post > 0)])} / {len(stats[stats.Dataset == 'train'])}")
print(f"Val: {len(stats[(stats.Dataset == 'val') & (stats.Pre > 0) & (stats.Post > 0)])} / {len(stats[stats.Dataset == 'val'])}")

## Visualize condition tests

In [None]:
# Sort actions by proportion of videos with pre/post frames (from lowest to highest).

id_actions = np.array(stats[["Action"]] \
    .assign(Pre = stats.Pre > 0, Post = stats.Post > 0) \
    .groupby("Action", as_index=False) \
    .mean() \
    .melt(id_vars="Action", value_vars=["Pre","Post"], var_name="Condition") \
    [["Action", "value"]] \
    .groupby("Action", as_index=False) \
    .min() \
    .sort_values("value") \
    [["Action"]]).squeeze().tolist()

In [None]:
# Generate videos for 30 worst-performing actions.

" ".join(str(id_action) for id_action in id_actions[:30])

for id_action in tqdm.notebook.tqdm(id_actions[:30]):
    process_action(id_action, generate_video=True, num_videos=15)

In [None]:
from s20bn_utils.draw import display_video_grid

path_videos = paths.data / "labeled_videos"
id_videos = [int(p.stem) for p in path_videos.iterdir() if p.suffix in {".mp4", ".webm"}]

mini_action_instances = [[] for _ in range(len(pddl.actions))]
for id_video in id_videos:
    id_action = labels.videos[id_video].id_action
    mini_action_instances[id_action].append(id_video)

display_video_grid(labels, mini_action_instances, paths.data / "labeled_videos", num_rows=5)

In [None]:
id_action = 60
test_results.update(process_action(id_action))
with open(paths.data / "condition_test_results.pkl", "wb") as f:
    pickle.dump(test_results, f)

append_pre_post_to_dataset(test_results, paths, id_action=id_action)
_ = process_action(id_action, generate_video=True, num_videos=15)

In [None]:
id_video = 43454
find_pre_post_frames(test_results[id_video])

# Load labels

In [None]:
import pickle

import config

paths = config.EnvironmentPaths(environment="twentybn")

"""
action_labels = [
    {
        "label": "Approaching something with your camera",
        "template": "Approaching [something] with your camera",
    }
]
"""
with open(paths.data / "action_labels.pkl", "rb") as f:
    action_labels = pickle.load(f)

"""
action_instances = [
    [{id_video}, ...]
]
"""
with open(paths.data / "action_instances.pkl", "rb") as f:
    action_instances = pickle.load(f)

"""
video_labels = {
    {id_video}: {
        "id_action": id_action,
        "placeholders": ["a potato", "a vicks vaporub bottle"],
        "objects": ["potato", "bottle"],
        "frames": {
            idx_frame: {
                "{id_object/hand}": [[x1, y1], [x2, y2]],
            },
        },
    },
}
"""
with open(paths.data / "video_labels.pkl", "rb") as f:
    video_labels = pickle.load(f)

"""
train_set = [{id_video}, ...]
"""
with open(paths.data / "train_set.pkl", "rb") as f:
    train_set = pickle.load(f)

"""
val_set = [{id_video}, ...]
"""
with open(paths.data / "val_set.pkl", "rb") as f:
    val_set = pickle.load(f)

"""
video_ranges = {
    {id_video}: (
        [idx_pre_frames, ...],
        [idx_post_frames, ...]
    )
}
"""
with open(paths.data / "video_ranges.pkl", "rb") as f:
    video_ranges = pickle.load(f)

# Generate hdf5 datasets

In [None]:
from s20bn_utils.build_dataset import extract_pre_post

#extract_pre_post(train_set[:10000], "pre_post_train_mini", paths.data)
#extract_pre_post(val_set[:10000], "pre_post_val_mini", paths.data)
extract_pre_post(train_set, "pre_post_train", paths.data)
extract_pre_post(val_set, "pre_post_val", paths.data)

In [None]:
from s20bn_utils.build_dataset import create_predicate_dataset
create_predicate_dataset(pddl, labels, train_set, "predicate_train", paths.data)
create_predicate_dataset(pddl, labels, val_set, "predicate_val", paths.data)
create_predicate_dataset(pddl, labels, test_set, "predicate_test", paths.data)

# Analyze dataset distribution

In [None]:
import math

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tqdm

from gpred import dnf_utils


def plot_predicate_counts(stats: pd.DataFrame):
    """Plots predicates (x) vs. count (y).
    
    Args:
        stats: Longform dataframe output by `compute_pddl_statistics()`.
    """
    f, ax = plt.subplots(figsize=(20, 10))

    sns.set_style("whitegrid")
    g = sns.countplot(data=stats.sort_values("Predicate"), x="Predicate", hue="Label")
    for item in g.get_xticklabels():
        item.set_rotation(90)

def plot_dnfs(stats: pd.DataFrame):
    """Plots a heatmap of actions vs. propositions specified by their DNFs.
    
    Args:
        stats: Longform table output by compute_pddl_statistics().
    """
    SIZE_SECTION = 10
    CMAP = sns.diverging_palette(10, 130, n=100)
    
    df_action_v_prop = stats.astype({"Label": float}).pivot(index=["Action", "Condition"], columns="Proposition", values="Label")
    num_rows = len(df_action_v_prop)
    num_sections = math.ceil(num_rows / SIZE_SECTION)

    f, axs = plt.subplots(num_sections, 1, figsize=(25, num_sections * 5))

    for i in tqdm.notebook.tqdm(range(num_sections)):
        plt.subplot(num_sections, 1, i + 1)
        g = sns.heatmap(data=df_action_v_prop[i*SIZE_SECTION:min(len(df_action_v_prop), (i+1)*SIZE_SECTION)], square=True, cmap=CMAP, linewidths=0.5, linecolor="#eee", cbar_kws={"shrink": 0.5})
        
def plot_predicate_weights(w: np.ndarray):
    """Plots predicates (x) vs. weight (y).
    
    Args:
        stats: Longform dataframe output by `compute_pddl_statistics()`.
    """
    f, ax = plt.subplots(figsize=(20, 10))
    
    df = pd.DataFrame(w.T, columns=["Pos", "Neg"], index=[str(pred) for pred in pddl.predicates])
    df.reset_index(level=0, inplace=True)
    df = pd.melt(df, id_vars=["index"], value_vars=["Pos","Neg"])
    df = df.rename(columns={"index": "Predicate", "variable": "Label", "value": "Weight"})

    sns.set_style("whitegrid")
    g = sns.barplot(data=df.sort_values("Predicate"), x="Predicate", y="Weight", hue="Label")
    for item in g.get_xticklabels():
        item.set_rotation(90)


In [None]:
import h5py

with h5py.File(paths.data / "predicate_val.hdf5","r") as f:
    actions = [str(action) for action in pddl.actions]
    action_instances = [actions[idx_action] for idx_action in f["actions"]]

In [None]:
stats = dnf_utils.compute_pddl_statistics(pddl)

In [None]:
pddl = symbolic.Pddl(str(paths.env / "domain.pddl"), str(paths.problem_pddl))
stats = dnf_utils.compute_pddl_statistics(pddl)

plot_predicate_counts(stats)

In [None]:
w = dnf_utils.compute_predicate_class_weights(pddl, action_instances=action_instances)
plot_predicate_weights(np.minimum(1, w))

In [None]:
w_inv = dnf_utils.compute_predicate_class_weights(pddl)
plot_predicate_weights(w_inv)

In [None]:
plot_dnfs(stats)

### Find video resolution ranges

In [None]:
with h5py.File(paths.data / "pre_post_train.hdf5", "r") as f:
    H_max = 0
    H_min = 10000
    W_max = 0
    W_min = 10000
    for id_video in tqdm.tqdm(f["videos"]):
        dim = f[str(id_video)]["images"].shape[2:]
        H_min = min(H_min, dim[0])
        H_max = max(H_max, dim[0])
        W_min = min(W_min, dim[1])
        W_max = max(W_max, dim[1])

with h5py.File(paths.data / "pre_post_val.hdf5", "r") as f:
    for id_video in tqdm.tqdm(f["videos"]):
        dim = f[str(id_video)]["images"].shape[2:]
        H_min = min(H_min, dim[0])
        H_max = max(H_max, dim[0])
        W_min = min(W_min, dim[1])
        W_max = max(W_max, dim[1])

print(H_max, W_max, H_min, W_min)

### List videos with mismatching placeholders

In [None]:
for id_video, video_label in video_labels.items():
    if len(video_label["objects"]) != len(video_label["placeholders"]):
        if not video_label["id_action"] in (102, 144):
            print(id_video, video_label["id_action"], video_label["objects"], video_label["placeholders"])