In [1]:
%autoreload 2

In [2]:
from collections import defaultdict
import json
import os
import sys


import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)


import tatsu
import tatsu.ast

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src.fitness_energy_utils import  load_data
from src.fitness_features import *
from src.ast_counter_sampler import *
from src.evolutionary_sampler import *
from src import fitness_features_by_category, latest_model_paths

2024-08-16 17:38:02 - ast_utils - DEBUG    - Using cache folder: /Users/guydavidson/tmp/game_generation_cache


In [3]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
all_game_ids = [ast[1].game_name for ast in game_asts]
total_games = len(all_game_ids)

human_games_trace_filter_data = load_data('', 'samples', f'/trace_filter_results_interactive-beta.pddl_2024_03_19', relative_path='..')

2024-08-16 17:38:05 - ast_utils - INFO     - Loading from cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz
2024-08-16 17:38:05 - ast_utils - INFO     - Finished loading cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz


## Load a mapping of game IDs to trace IDs

In [4]:
GAME_TO_TRACES_MAPPING_PATH =  '../data/game_to_trace_id.json'

with open(GAME_TO_TRACES_MAPPING_PATH) as f:
    game_to_traces_mapping = json.load(f)

## Check the 'reward machine evaluation results from all particpants' interactions

In [5]:
total_game_components = 0
unsatisfied_game_components = []
games_with_satisfied_components = set()
games_with_unsatisfied_components = set()

for index, game_id in enumerate(all_game_ids):
    game_trace_filter_results = human_games_trace_filter_data['full'][index]
    for game_component_name, trace_id_to_count in game_trace_filter_results.items():
        total_game_components += 1

        if not trace_id_to_count:
            unsatisfied_game_components.append((index, game_id, game_component_name))
            games_with_unsatisfied_components.add(game_id)

        else:
            games_with_satisfied_components.add(game_id)

print(f'Of the {total_games} games in our dataset, {len(games_with_satisfied_components)} have at least one component satisfied, while {len(games_with_unsatisfied_components)} have at least one component unsatisfied')
print(f'Of the {total_game_components} total components across the {len(all_game_ids)} games in our dataset, {len(unsatisfied_game_components)} unique components are never satisfied')

Of the 98 games in our dataset, 95 have at least one component satisfied, while 15 have at least one component unsatisfied
Of the 249 total components across the 98 games in our dataset, 23 unique components are never satisfied


## Check the 'reward machine' evaluation results from the creating participant's interactions

In [6]:
same_participant_trace_filter_results = defaultdict(dict)

for index, game_id in enumerate(all_game_ids):
    game_trace_filter_results = human_games_trace_filter_data['full'][index]
    for game_component_name, trace_id_to_count in game_trace_filter_results.items():
        trace_ids = [trace_id.split('-')[0] for trace_id in trace_id_to_count]
        section_firestore_ids = set(trace_ids)
        participant_db_ids = set(game_to_traces_mapping[game_id])
        shared_ids = participant_db_ids & section_firestore_ids
        same_participant_trace_filter_results[game_id][game_component_name] = len(shared_ids)

    same_participant_trace_filter_results[game_id]['all'] = all(same_participant_trace_filter_results[game_id].values())
    same_participant_trace_filter_results[game_id]['any'] = any(same_participant_trace_filter_results[game_id].values())


In [7]:
any_count = sum([v['any'] for v in same_participant_trace_filter_results.values()])
all_count = sum([v['all'] for v in same_participant_trace_filter_results.values()])


print(f'Of the {total_games} games in our dataset, {any_count} ({any_count / total_games * 100:.2f}%) have at least one component satisfied by the participant who created the game')
print(f'Of the {len(all_game_ids)} games in our dataset, {all_count} ({all_count / total_games * 100:.2f}%) have all of their components satisfied by the participant who created the game')

Of the 98 games in our dataset, 86 (87.76%) have at least one component satisfied by the participant who created the game
Of the 98 games in our dataset, 56 (57.14%) have all of their components satisfied by the participant who created the game
