In [19]:
from collections import defaultdict
import csv
from argparse import Namespace
from ast import literal_eval
import copy
import gzip
import itertools
import json
import math
import os
import pickle
import sys
import textwrap
import typing

import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)

import duckdb
from IPython.display import display, Markdown, HTML  # type: ignore
import matplotlib
import matplotlib.axes
import matplotlib.pyplot as plt
from Levenshtein import distance as _edit_distance
import numpy as np
import pandas as pd
import tabulate
import tatsu
import tatsu.ast
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tabulate
from tqdm.notebook import tqdm
from scipy import stats
from scipy.special import comb
import seaborn as sns
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline
from tqdm import tqdm

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src.ast_utils import _extract_game_id, deepcopy_ast, replace_child
from src.ast_printer import ast_to_lines
from src import fitness_energy_utils as utils
from src.fitness_energy_utils import NON_FEATURE_COLUMNS, load_data
from src.fitness_features import *
from src.ast_counter_sampler import *
from src.evolutionary_sampler import *
from src import fitness_features_by_category, latest_model_paths
from src import ast_parser

In [2]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))

2024-05-09 10:09:41 - ast_utils - INFO     - Loading from cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz
2024-05-09 10:09:41 - ast_utils - INFO     - Finished loading cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz


In [3]:
model_key = 'max_exemplar_preferences_by_bcs_with_expected_values'
model_spec = latest_model_paths.MAP_ELITES_MODELS[model_key]
model = typing.cast(MAPElitesSampler, model_spec.load())

key_to_real_game_index = defaultdict(list)
real_game_index_to_key = {}
real_game_fitness_scores = []
ALL_REAL_GAME_KEYS = []
for i, ast in enumerate(game_asts):
    fitness_score, features = model._score_proposal(ast, return_features=True)  # type: ignore
    real_game_fitness_scores.append(fitness_score)
    key = model._features_to_key(ast, features)
    key_to_real_game_index[key].append(i)
    real_game_index_to_key[i] = key
    ALL_REAL_GAME_KEYS.append(key)

2024-05-09 10:09:41 - compile_predicate_statistics_full_database - INFO     - Loading data from files with query timeout 15
2024-05-09 10:10:02 - compile_predicate_statistics_full_database - INFO     - Creating DuckDB table...
2024-05-09 10:10:04 - compile_predicate_statistics_full_database - INFO     - Creating data table indices...
2024-05-09 10:10:24 - compile_predicate_statistics_full_database - INFO     - Loaded data, found 843825 rows


In [4]:
trace_filter_results = model_spec.load_trace_filter_data()
human_games_trace_filter_data = load_data('', 'samples', f'/trace_filter_results_interactive-beta.pddl_2024_03_19', relative_path='..')

In [5]:
REAL_GAME_INDICES_TO_INCLUDE = [
    0, 4, 6, 7, 11,
    14, 17, 23, 26, 28,
    31, 32, 35, 37, 40,
    41, 42, 45, 49, 51,
    52, 55, 58, 59, 64,
    74, 88, 90, 94, 96,
]

# REAL_GAME_INDICES_TO_INCLUDE = list(range(98))

REAL_GAME_KEY_LIST = [real_game_index_to_key[i] for i in REAL_GAME_INDICES_TO_INCLUDE]
REAL_GAME_KEY_DICT = {key: i for i, key in enumerate(REAL_GAME_KEY_LIST)}
REAL_GAME_KEYS = set(REAL_GAME_KEY_LIST)

UNMATCHED_TOP_30_KEYS = [
    (1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1),
    (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0),
    (1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0),
    (1, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1),
    (1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0),
    (1, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0),
    (1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0),
    (1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 0, 0),
    (1, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 1, 0, 0, 1, 0, 0, 0, 1, 0),
    (1, 1, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0),
    (1, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0),
    (1, 0, 4, 0, 1, 1, 0, 1, 0, 1, 0, 0),
    (1, 0, 4, 0, 0, 0, 0, 0, 3, 0, 0, 0),
    (1, 1, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 4, 0, 0, 1, 1, 1, 0, 1, 0, 0),
    (1, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 1),
    (1, 1, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0),
    (1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0)
]

In [6]:
TRANSLATIONS_DIR = '../llm_tests/translations'
TRANSLATION_DATE = '2024_01_12'
UNMATCHED_ONLY_TOP_30 = True

with open(f'{TRANSLATIONS_DIR}/human_games_translations_split_{TRANSLATION_DATE}.json') as f:
    human_game_texts = json.load(f)
    human_game_texts = {literal_eval(k): v for k, v in human_game_texts.items()}

with open(f'{TRANSLATIONS_DIR}/human_cell_archive_games_translations_split_{TRANSLATION_DATE}.json') as f:
    human_cell_archive_game_texts = json.load(f)
    human_cell_archive_game_texts = {literal_eval(k): v for k, v in human_cell_archive_game_texts.items()}

with open(f'{TRANSLATIONS_DIR}/novel_archive_cell_games_translations_split_{TRANSLATION_DATE}.json') as f:
    novel_archive_cell_game_texts = json.load(f)
    novel_archive_cell_game_texts = {literal_eval(k): v for k, v in novel_archive_cell_game_texts.items()}
    if UNMATCHED_ONLY_TOP_30:
        novel_archive_cell_game_texts = {k: v for k, v in novel_archive_cell_game_texts.items() if k in UNMATCHED_TOP_30_KEYS}

In [7]:
real_game_key_to_ast = {key: game_asts[i] for key, i in REAL_GAME_KEY_DICT.items()}
matched_game_key_to_ast = {key: model.population[key] for key in human_cell_archive_game_texts.keys()}
unmatched_game_key_to_ast = {key: model.population[key] for key in novel_archive_cell_game_texts.keys()}

In [8]:
def get_activating_traces(filter_info, key, exclude_setup=False):
    sub_ast_to_trace_activations = filter_info['full'][key]
    
    sub_ast_to_activating_traces = {}
    for sub_ast, trace_activations in sub_ast_to_trace_activations.items():
        activating_traces = [trace for trace, activation in trace_activations.items() if activation > 0]
        sub_ast_to_activating_traces[sub_ast] = set(activating_traces)

    if exclude_setup:
        sub_ast_to_activating_traces = {sub_ast: traces for sub_ast, traces in sub_ast_to_activating_traces.items() if 'setup' not in sub_ast}

    sub_ast_to_activating_traces['all'] = set.intersection(*[sub_ast_to_activating_traces[sub_ast] for sub_ast in sub_ast_to_activating_traces.keys()])
    sub_ast_to_activating_traces['any'] = set.union(*[sub_ast_to_activating_traces[sub_ast] for sub_ast in sub_ast_to_activating_traces.keys()])


    return sub_ast_to_activating_traces

## Actual Analysis

In [9]:
generated_keys_mapping = {
    "matched": list(matched_game_key_to_ast.keys()),
    "unmatched": list(unmatched_game_key_to_ast.keys())
}
all_human_game_keys = real_game_index_to_key.values()

In [10]:
def jaccard(a, b, aggregation):
    if len(a[aggregation]) == 0 and len(b[aggregation]) == 0:
        return 0
    
    return len(a[aggregation].intersection(b[aggregation])) / len(a[aggregation].union(b[aggregation]))

In [11]:
data = []
for exclude_setup in [False, True]:
    # human_game_activating_traces = [
    #     get_activating_traces(remapped_human_games_trace_filter_data, key, exclude_setup=exclude_setup)
    #     for key in all_human_game_keys
    # ]

    human_game_activating_traces = [
        get_activating_traces(human_games_trace_filter_data, idx, exclude_setup=exclude_setup)
        for idx in range(98)
    ]

    for aggregation in ['all', 'any']:
        for key_type in generated_keys_mapping.keys():
            closest_similarities = []
            for key in generated_keys_mapping[key_type]:
                activating_traces = get_activating_traces(trace_filter_results, key, exclude_setup)
                similarities = [jaccard(activating_traces, human_game, aggregation) for human_game in human_game_activating_traces]
                closest_similarities.append(max(similarities))

            data.append({
                "exclude_setup": exclude_setup,
                "aggregation": aggregation,
                "key_type": key_type,
                "avg_closest_similarity": np.mean(closest_similarities)
            })
            print(f"\n{key_type} games, '{aggregation}' aggregation, exclude_setup={exclude_setup}:")
            print(f"Average Jaccard similarity between generated game and closest human game: {np.mean(closest_similarities)}")


matched games, 'all' aggregation, exclude_setup=False:
Average Jaccard similarity between generated game and closest human game: 0.25328159603044526

unmatched games, 'all' aggregation, exclude_setup=False:
Average Jaccard similarity between generated game and closest human game: 0.2566130396475719

matched games, 'any' aggregation, exclude_setup=False:
Average Jaccard similarity between generated game and closest human game: 0.7600344981838728

unmatched games, 'any' aggregation, exclude_setup=False:
Average Jaccard similarity between generated game and closest human game: 0.7366754939634391

matched games, 'all' aggregation, exclude_setup=True:
Average Jaccard similarity between generated game and closest human game: 0.3826242941482416

unmatched games, 'all' aggregation, exclude_setup=True:
Average Jaccard similarity between generated game and closest human game: 0.3020031489692554

matched games, 'any' aggregation, exclude_setup=True:
Average Jaccard similarity between generated g

In [12]:
print(pd.DataFrame(data))

   exclude_setup aggregation   key_type  avg_closest_similarity
0          False         all    matched                0.253282
1          False         all  unmatched                0.256613
2          False         any    matched                0.760034
3          False         any  unmatched                0.736675
4           True         all    matched                0.382624
5           True         all  unmatched                0.302003
6           True         any    matched                0.800355
7           True         any  unmatched                0.725037


## Code for Supplemental Figure

In [13]:
# The keys of the real games for which we want to know the most similar generated game
TARGET_KEYS = [
    (1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0), # matched 14
    (1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0), # matched 31
    (1, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0), # matched 40

    (1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0), # unmatched (place the bin near the north wall...)
    (1, 1, 3, 1, 0, 0, 1, 0, 0, 0, 1, 0), # unmatched (credit cards and CDs)
    (1, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0), # unmatched (block stacking)
]


In [14]:
len(all_human_game_keys)
len(real_game_key_to_ast.values())

30

In [24]:
postprocessor = ast_parser.ASTSamplePostprocessor()

def ast_to_standardized_string(ast):
    ast_str = ast_printer.ast_to_string(postprocessor(ast))
    domain_index = ast_str.find('(:domain')
    close_paren_index = ast_str.find(')', domain_index)
    return re.sub(r'\s+', ' ', ast_str[close_paren_index + 1:-1])


standardized_real_game_strings = [ast_to_standardized_string(ast) for ast in game_asts]

In [29]:
from thefuzz import fuzz

In [31]:
data = []
for idx, key in enumerate(TARGET_KEYS):
    target_game_standardized_string = ast_to_standardized_string(model.population[key])
    edit_distances = [_edit_distance(target_game_standardized_string, real_game_standardized_str) for real_game_standardized_str in standardized_real_game_strings]
    print(edit_distances)
    # Print games in their non-standardized forms

    closest_human_game_idx = np.argmin(edit_distances)
    print(closest_human_game_idx, edit_distances[closest_human_game_idx])
    closest_human_game = "\n".join(ast_to_lines(game_asts[closest_human_game_idx]))

    target_ast = matched_game_key_to_ast[key] if idx <= 2 else unmatched_game_key_to_ast[key]
    target_game = "\n".join(ast_to_lines(target_ast))
    
    data.append((key, closest_human_game_idx, target_game, closest_human_game, edit_distances[closest_human_game_idx]))

filename = f"./temp_outputs/supplemental_figure_closest_games_string_edit_distance.csv"
with open(filename, 'w') as f:
    writer = csv.writer(f)
    writer.writerow(["key", "closest_human_game_idx", "target_game", "closest_human_game", "edit_distance"])
    writer.writerows(data)

        

[446, 679, 179, 166, 605, 624, 671, 271, 450, 278, 372, 286, 845, 411, 107, 1141, 610, 1081, 922, 267, 811, 817, 693, 139, 516, 1259, 201, 766, 439, 338, 158, 146, 387, 605, 564, 590, 729, 490, 248, 1141, 635, 231, 145, 233, 347, 336, 267, 300, 721, 1041, 304, 860, 633, 704, 237, 160, 1426, 1067, 276, 1134, 764, 386, 304, 400, 358, 1258, 164, 1002, 143, 2153, 381, 143, 415, 648, 303, 1037, 375, 255, 145, 380, 163, 462, 313, 361, 1079, 653, 261, 735, 287, 1619, 518, 1001, 454, 232, 591, 405, 806, 457]
14 107
[394, 638, 187, 95, 580, 593, 639, 199, 494, 253, 366, 218, 806, 386, 159, 1107, 602, 1062, 879, 212, 768, 837, 657, 135, 493, 1235, 197, 740, 372, 281, 93, 87, 345, 552, 499, 560, 698, 458, 234, 1140, 595, 175, 62, 191, 366, 294, 228, 229, 747, 1051, 243, 833, 605, 691, 191, 161, 1409, 1050, 217, 1124, 739, 354, 272, 356, 314, 1248, 95, 982, 66, 2151, 335, 61, 393, 628, 278, 996, 337, 191, 62, 334, 66, 430, 260, 335, 1058, 614, 194, 715, 220, 1606, 488, 970, 414, 236, 568, 337, 776

In [None]:
14, 42, 19 28, 19, 44