# Dataset stats workbook
- Created by Gabe (2022-05-07)
- For Theo to use to compute dataset statistics

In [28]:
import numpy as np
from run_experiment import init_experiment_state_and_iterator
from dreamcoder.program import Program
from src.config_builder import build_config
from src.experiment_iterator import EXPORT_DIRECTORY
from src.task_loaders import GroundTruthOrderedTaskBatcher

In [89]:
#DOMAIN = "drawings_nuts_bolts"
#DOMAIN = "drawings_furniture"
#DOMAIN = "drawings_dials"
#DOMAIN = "drawings_wheels"

DOMAIN = "clevr"
#DOMAIN = "re2"

In [90]:
config = build_config(
    experiment_name="test_experiment",
    experiment_type="stitch",
    domain=DOMAIN,
    task_batcher="ground_truth_ordered_task_batcher",
    random_seed=111,
    global_batch_size="all",
    codex_params={},
    stitch_params={},
    compute_likelihoods=False,
    compute_description_lengths=True,
)

In [91]:
experiment_state, experiment_iterator = init_experiment_state_and_iterator(
    {}, config
)
experiment_state.initialize_ground_truth_task_frontiers(task_split="train")
experiment_state.initialize_ground_truth_task_frontiers(task_split="test")

Loaded 8 CLEVR question classes: dict_keys(['2_localization', '2_transform', '1_zero_hop', '1_single_or', '1_same_relate_restricted', '1_compare_integer', '1_one_hop', '2_remove'])
Loading dataset 1_compare_integer: train: found 6 tasks.
Loading dataset 1_compare_integer: val: found 4 tasks.
Loading dataset 1_one_hop: train: found 30 tasks.
Loading dataset 1_one_hop: val: found 10 tasks.
Loading dataset 1_single_or: train: found 25 tasks.
Loading dataset 1_single_or: val: found 10 tasks.
Loading dataset 1_zero_hop: train: found 30 tasks.
Loading dataset 1_zero_hop: val: found 30 tasks.
Loading dataset 2_localization: train: found 58 tasks.
Loading dataset 2_localization: val: found 30 tasks.
Loading dataset 2_remove: train: found 23 tasks.
Loading dataset 2_remove: val: found 9 tasks.
Loading dataset 2_transform: train: found 22 tasks.
Loading dataset 2_transform: val: found 10 tasks.
Loaded a total of 194 training tasks and 103 testing tasks for curriculum datasets: [] and main datase

In [92]:
train_frontiers = experiment_state.get_frontiers_for_ids(task_split="train", task_ids="all")
print(len(train_frontiers))

191


In [93]:
# A frontier contains one or more programs that solve a task
train_frontiers[0]

Frontier(entries=[FrontierEntry(program=(lambda (clevr_lt? (clevr_count (clevr_fold (clevr_fold (clevr_fold $0 clevr_empty (lambda (lambda (clevr_if (clevr_eq_size clevr_large (clevr_query_size $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_material clevr_rubber (clevr_query_material $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_shape clevr_cube (clevr_query_shape $1)) (clevr_add $1 $0) $0))))) (clevr_count (clevr_fold (clevr_fold (clevr_fold $0 clevr_empty (lambda (lambda (clevr_if (clevr_eq_size clevr_large (clevr_query_size $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_color clevr_green (clevr_query_color $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_material clevr_rubber (clevr_query_material $1)) (clevr_add $1 $0) $0))))))), logPrior=0.0, logLikelihood=0.0], task=0-1_compare_integer-Is the number of large rubber cubes less than the number of large green ru

In [94]:
# The first program in the frontier. You can assume all domains have one program per frontier.
p = train_frontiers[0].entries[0].program
print(p)

(lambda (clevr_lt? (clevr_count (clevr_fold (clevr_fold (clevr_fold $0 clevr_empty (lambda (lambda (clevr_if (clevr_eq_size clevr_large (clevr_query_size $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_material clevr_rubber (clevr_query_material $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_shape clevr_cube (clevr_query_shape $1)) (clevr_add $1 $0) $0))))) (clevr_count (clevr_fold (clevr_fold (clevr_fold $0 clevr_empty (lambda (lambda (clevr_if (clevr_eq_size clevr_large (clevr_query_size $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_color clevr_green (clevr_query_color $1)) (clevr_add $1 $0) $0)))) clevr_empty (lambda (lambda (clevr_if (clevr_eq_material clevr_rubber (clevr_query_material $1)) (clevr_add $1 $0) $0)))))))


In [95]:
# description length
len(Program.left_order_tokens(p, show_vars=True))

71

In [96]:
# character length
len(str(p))

825

In [86]:
# TODO(theoxo): Compute and report the following for the paper
# - number of programs in each domain, broken down by train/test
# - mean and std of description and character lengths for all domains, broken down by train/test
# - any other relevant program stats you can think of

In [87]:
data = {}
for t in ["train", "test"]:
    data[t] = {}
    frontiers = experiment_state.get_frontiers_for_ids(task_split=t, task_ids="all")
    data[t]["count"] = len(frontiers)
    data[t]["dls"] = np.array([len(Program.left_order_tokens(frontier.entries[0].program, show_vars=True)) for frontier in frontiers])
    data[t]["chars"] = np.array([len(str(frontier.entries[0].program)) for frontier in frontiers])

In [88]:
print(f"Domain={DOMAIN}")
print(f"Number of programs: train={data['train']['count']} test={data['test']['count']}")
print(f"Mean and std-dev of description length: train={(np.mean(data['train']['dls']), np.std(data['train']['dls']))} test={(np.mean(data['test']['dls']), np.std(data['test']['dls']))}")
print(f"Mean and std-dev of char length: train={(np.mean(data['train']['chars']), np.std(data['train']['chars']))} test={(np.mean(data['test']['chars']), np.std(data['test']['chars']))}")

Domain=re2
Number of programs: train=491 test=500
Mean and std-dev of description length: train=(41.032586558044805, 27.01585408468125) test=(38.95, 26.11121406599088)
Mean and std-dev of char length: train=(276.4725050916497, 179.9200681249624) test=(262.742, 172.69280076482633)
