In [25]:
import json
import numpy as np

from collections import defaultdict
import sys

sys.path.insert(0, '../')
from src.common import *
from src.questions_construction.domains import DOMAIN_NAMES, ALL_DOMAIN_CLASSES_BY_NAME

In [42]:
FLUENT_TYPES_KEYS = ['base', 'derived', 'persistent', 'static', 'total']

def is_fluent_part_of_type(fluent, fluent_prefixes):
    for prefix in fluent_prefixes:
        if fluent.startswith(prefix):
            return True
    return False

def num_fluents(data, domain_name):
    fluents_all = data[0][INIT_ACTION_KEY][FLUENTS_KEY] + data[0][INIT_ACTION_KEY][NEG_FLUENTS_KEY]
    
    results = {}
    results['total'] = len(fluents_all)
    results['base'] = len([f for f in fluents_all if is_fluent_part_of_type(f, ALL_DOMAIN_CLASSES_BY_NAME[domain_name].BASE_FLUENTS)])
    results['derived'] = len([f for f in fluents_all if is_fluent_part_of_type(f, ALL_DOMAIN_CLASSES_BY_NAME[domain_name].DERIVED_FLUENTS)])
    results['persistent'] = len([f for f in fluents_all if is_fluent_part_of_type(f, ALL_DOMAIN_CLASSES_BY_NAME[domain_name].PERSISTENT_FLUENTS)])
    results['static'] = len([f for f in fluents_all if is_fluent_part_of_type(f, ALL_DOMAIN_CLASSES_BY_NAME[domain_name].STATIC_FLUENTS)])
    return results


In [43]:
stats_by_domain = {}
for domain_name in sorted(DOMAIN_NAMES):
    action_lengths = []
    objects = []
    by_instance = {}
    for i in range(1, 11):
        instance_name = f'Instance_{i}'
        data = open_jsonl(f'{DATA_PATH}/states_actions/{domain_name}/{instance_name}.jsonl')
        by_instance[instance_name] = num_fluents(data, domain_name)
    stats_by_fluent_type = {}
    for fluent_type in FLUENT_TYPES_KEYS:
        fluents_count_by_instance = {k: f[fluent_type] for k, f in by_instance.items()}
        fluents_count = list(fluents_count_by_instance.values())
        stats_by_fluent_type[fluent_type] = {'mean': float(np.mean(fluents_count)),
                                             'std': float(np.std(fluents_count)),
                                             'min': int(np.min(fluents_count)),
                                             'max': int(np.max(fluents_count)),
                                             'count_by_instance': fluents_count_by_instance}
    stats_by_domain[domain_name] = stats_by_fluent_type
with open('fluents_by_domain.json', 'w') as f:
    json.dump(stats_by_domain, f)

In [44]:
# sorted(stats_by_domain.items(), key=lambda x: x[1]['mean'])
stats_by_domain

{'blocksworld': {'base': {'mean': 8.1,
   'std': 0.8306623862918076,
   'min': 7,
   'max': 9,
   'count_by_instance': {'Instance_1': 7,
    'Instance_2': 9,
    'Instance_3': 7,
    'Instance_4': 8,
    'Instance_5': 9,
    'Instance_6': 8,
    'Instance_7': 9,
    'Instance_8': 7,
    'Instance_9': 9,
    'Instance_10': 8}},
  'derived': {'mean': 9.1,
   'std': 0.8306623862918076,
   'min': 8,
   'max': 10,
   'count_by_instance': {'Instance_1': 8,
    'Instance_2': 10,
    'Instance_3': 8,
    'Instance_4': 9,
    'Instance_5': 10,
    'Instance_6': 9,
    'Instance_7': 10,
    'Instance_8': 8,
    'Instance_9': 10,
    'Instance_10': 9}},
  'persistent': {'mean': 66.3,
   'std': 13.334541611919024,
   'min': 49,
   'max': 81,
   'count_by_instance': {'Instance_1': 49,
    'Instance_2': 81,
    'Instance_3': 49,
    'Instance_4': 64,
    'Instance_5': 81,
    'Instance_6': 64,
    'Instance_7': 81,
    'Instance_8': 49,
    'Instance_9': 81,
    'Instance_10': 64}},
  'static': {'me