In [1]:
%autoreload 2

In [2]:
from collections import defaultdict
from argparse import Namespace
from ast import literal_eval
import copy
import gzip
import itertools
import json
import math
import os
import pickle
import sys
import textwrap
import typing

import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)

import duckdb
from IPython.display import display, Markdown, HTML  # type: ignore
import matplotlib
import matplotlib.axes
import matplotlib.pyplot as plt
from Levenshtein import distance as _edit_distance
import numpy as np
import pandas as pd
import tabulate
import tatsu
import tatsu.ast
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tabulate
from tqdm.notebook import tqdm
from scipy import stats
import seaborn as sns
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src.ast_utils import _extract_game_id, deepcopy_ast, replace_child
from src.ast_printer import ast_to_lines
from src import fitness_energy_utils as utils
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.fitness_features import *
from src.ast_counter_sampler import *
from src.evolutionary_sampler import *
from src import fitness_features_by_category, latest_model_paths

2024-03-05 18:24:42 - ast_utils - DEBUG    - Using cache folder: /Users/guydavidson/tmp/game_generation_cache
2024-03-05 18:24:42 - src.ast_utils - DEBUG    - Using cache folder: /Users/guydavidson/tmp/game_generation_cache


In [3]:

grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
real_game_texts = [ast_printer.ast_to_string(ast, '\n') for ast in game_asts]
# regrown_game_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples.pddl'))
# regrown_game_1024_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples-1024.pddl'))
# print(len(real_game_texts), len(regrown_game_texts), len(regrown_game_texts) / 98, len(regrown_game_1024_texts), len(regrown_game_1024_texts) / 98)

fitness_df = utils.load_fitness_data('../data/fitness_features_1024_regrowths.csv.gz')
print(fitness_df.src_file.unique())
fitness_df.head()

2024-03-05 18:24:45 - ast_utils - INFO     - Loading from cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz
2024-03-05 18:24:45 - ast_utils - INFO     - Finished loading cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz


['interactive-beta.pddl' 'ast-real-regrowth-samples-1024.pddl.gz']


Unnamed: 0,Index,src_file,game_name,domain_name,original_game_name,real,variables_defined_all,variables_defined_prop,variables_used_all,variables_used_prop,...,ast_ngram_constraints_n_4_score,ast_ngram_constraints_n_5_score,ast_ngram_terminal_n_2_score,ast_ngram_terminal_n_3_score,ast_ngram_terminal_n_4_score,ast_ngram_terminal_n_5_score,ast_ngram_scoring_n_2_score,ast_ngram_scoring_n_3_score,ast_ngram_scoring_n_4_score,ast_ngram_scoring_n_5_score
0,0,interactive-beta.pddl,6172feb1665491d1efbce164-0,medium-objects-room-v1,6172feb1665491d1efbce164-0,1,1,1.0,1,1.0,...,0.969177,0.979573,0.965285,0.976289,0.977376,0.980148,0.915272,0.960175,0.999188,1.0
1,1,interactive-beta.pddl,5f77754ba932fb2c4ba181d8-2,many-objects-room-v1,5f77754ba932fb2c4ba181d8-2,1,1,1.0,1,1.0,...,0.974875,0.978991,0.961082,0.972691,0.97181,0.980943,0.895362,0.910697,0.963311,0.973001
2,2,interactive-beta.pddl,614b603d4da88384282967a7-3,many-objects-room-v1,614b603d4da88384282967a7-3,1,1,1.0,1,1.0,...,0.954233,0.972808,0.0,0.0,0.0,0.0,0.841097,0.898279,0.946597,0.955042
3,3,interactive-beta.pddl,5bc79f652885710001a0e82a-5,few-objects-room-v1,5bc79f652885710001a0e82a-5,1,1,1.0,1,1.0,...,0.986178,0.982877,0.0,0.0,0.0,0.0,0.915272,0.960175,0.999188,1.0
4,4,interactive-beta.pddl,614dec67f6eb129c3a77defd-6,medium-objects-room-v1,614dec67f6eb129c3a77defd-6,1,1,1.0,1,1.0,...,0.984002,0.979782,0.0,0.0,0.0,0.0,0.964905,0.971973,0.972563,0.962818


In [5]:
# trace_filter_results_path = '../samples/trace_filter_results_max_exemplar_preferences_by_bcs_with_expected_values_2023_11_29_2023_12_05_1.pkl.gz'
model_key = 'max_exemplar_preferences_by_bcs_with_expected_values'
model_spec = latest_model_paths.MAP_ELITES_MODELS[model_key]
model = typing.cast(MAPElitesSampler, model_spec.load())

key_to_real_game_index = defaultdict(list)
real_game_index_to_key = {}
real_game_fitness_scores = []
ALL_REAL_GAME_KEYS = []
for i, ast in enumerate(game_asts):
    fitness_score, features = model._score_proposal(ast, return_features=True)  # type: ignore
    real_game_fitness_scores.append(fitness_score)
    key = model._features_to_key(ast, features)
    key_to_real_game_index[key].append(i)
    real_game_index_to_key[i] = key
    ALL_REAL_GAME_KEYS.append(key)

trace_filter_results = model_spec.load_trace_filter_data()
trace_filter_results.keys()

2024-03-05 18:25:13 - compile_predicate_statistics_full_database - INFO     - Loading data from files
2024-03-05 18:25:33 - compile_predicate_statistics_full_database - INFO     - Creating DuckDB table...
2024-03-05 18:25:34 - compile_predicate_statistics_full_database - INFO     - Creating data table indices...
2024-03-05 18:25:53 - compile_predicate_statistics_full_database - INFO     - Loaded data, found 843825 rows


dict_keys(['summary', 'full'])

In [11]:
REAL_GAME_INDICES_TO_INCLUDE = [
    0, 4, 6, 7, 11,
    14, 17, 23, 26, 28,
    31, 32, 35, 37, 40,
    41, 42, 45, 49, 51,
    52, 55, 58, 59, 64,
    74, 88, 90, 94, 96,
]

REAL_GAME_KEY_LIST = [real_game_index_to_key[i] for i in REAL_GAME_INDICES_TO_INCLUDE]
REAL_GAME_KEY_DICT = {key: i for i, key in enumerate(REAL_GAME_KEY_LIST)}
REAL_GAME_KEYS = set(REAL_GAME_KEY_LIST)
print(len(REAL_GAME_KEYS))

30


In [14]:
UNMATCHED_TOP_30_KEYS = [
    (1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1),
    (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0),
    (1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 0, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0),
    (1, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1),
    (1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0),
    (1, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0),
    (1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0),
    (1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 0, 0),
    (1, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 1, 0, 0, 1, 0, 0, 0, 1, 0),
    (1, 1, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0),
    (1, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0),
    (1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0),
    (1, 0, 4, 0, 1, 1, 0, 1, 0, 1, 0, 0),
    (1, 0, 4, 0, 0, 0, 0, 0, 3, 0, 0, 0),
    (1, 1, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 1, 4, 0, 0, 1, 1, 1, 0, 1, 0, 0),
    (1, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 1),
    (1, 1, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0),
    (1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0),
    (1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0)
]

In [15]:
TRANSLATIONS_DIR = '../llm_tests/translations'
TRANSLATION_DATE = '2024_01_12'
UNMATCHED_ONLY_TOP_30 = True

with open(f'{TRANSLATIONS_DIR}/human_games_translations_split_{TRANSLATION_DATE}.json') as f:
    human_game_texts = json.load(f)
    human_game_texts = {literal_eval(k): v for k, v in human_game_texts.items()}

with open(f'{TRANSLATIONS_DIR}/human_cell_archive_games_translations_split_{TRANSLATION_DATE}.json') as f:
    human_cell_archive_game_texts = json.load(f)
    human_cell_archive_game_texts = {literal_eval(k): v for k, v in human_cell_archive_game_texts.items()}

with open(f'{TRANSLATIONS_DIR}/novel_archive_cell_games_translations_split_{TRANSLATION_DATE}.json') as f:
    novel_archive_cell_game_texts = json.load(f)
    novel_archive_cell_game_texts = {literal_eval(k): v for k, v in novel_archive_cell_game_texts.items()}
    if UNMATCHED_ONLY_TOP_30:
        novel_archive_cell_game_texts = {k: v for k, v in novel_archive_cell_game_texts.items() if k in UNMATCHED_TOP_30_KEYS}

### Map each archive cell key to the relevant AST

@gdrtodd -- we could also pull in more human games, etc., but these are the ones from the human eval dataset 

In [16]:
real_game_key_to_ast = {key: game_asts[i] for key, i in REAL_GAME_KEY_DICT.items()}
matched_game_key_to_ast = {key: model.population[key] for key in human_cell_archive_game_texts.keys()}
unmatched_game_key_to_ast = {key: model.population[key] for key in novel_archive_cell_game_texts.keys()}

### Extract fitness features for a game

This returns a dict where each key is a fitness feature name and each value is the value of that feature

If for some reason you want all features, rather than the ones that the model used, set `only_used=False`

In [18]:
def extract_fitness_features(ast: tatsu.ast.AST, only_used: bool = True):
    features = model._proposal_to_features(ast)
    if only_used:
        features = {k: v for k, v in features.items() if k in model.feature_names}
    return features

extract_fitness_features(real_game_key_to_ast[REAL_GAME_KEY_LIST[0]])

{'variables_used_all': 1,
 'variables_used_prop': 1.0,
 'preferences_used_all': 1,
 'preferences_used_prop': 1.0,
 'setup_quantified_objects_used': 1.0,
 'any_setup_objects_used': 1,
 'predicate_found_in_data_prop': 1.0,
 'predicate_found_in_data_small_logicals_prop': 1.0,
 'adjacent_once_found': 0,
 'adjacent_same_modal_found': 0,
 'once_in_middle_of_pref_found': 0,
 'pref_without_hold_found': 0,
 'repeated_variables_found': 0,
 'repeated_variable_type_in_either': 0,
 'nested_logicals_found': 0,
 'identical_logical_children_found': 0,
 'identical_scoring_children_found': 0,
 'redundant_expression_found': 0,
 'redundant_scoring_terminal_expression_found': 0,
 'unnecessary_expression_found': 0,
 'unnecessary_scoring_terminal_expression_found': 0,
 'total_score_non_positive': 0,
 'scoring_preferences_used_identically': 0,
 'identical_consecutive_seq_func_predicates_found': 0,
 'disjoint_preferences_found': 0,
 'disjoint_preferences_scoring_terminal_types': 0,
 'disjoint_preferences_scori

I don't know why you'd ever want a game's features as a Tensor, but just in case

In [19]:
def extract_fitness_tensor(ast: tatsu.ast.AST):
    features = extract_fitness_features(ast, False)
    return model._features_to_tensor(features)


def fitness_score(ast: tatsu.ast.AST):
    return model._score_proposal(ast, return_features=False)

fitness_score(real_game_key_to_ast[REAL_GAME_KEY_LIST[0]])

36.583473205566406

The below explicitly maps a game to its BCs, mostly useful in case you want to know which BC index is which feature

In [21]:
def game_to_behavioral_feature_dict(ast: tatsu.ast.AST):
    features = extract_fitness_features(ast, False)
    return model.custom_featurizer.get_game_features(ast, features)


game_to_behavioral_feature_dict(real_game_key_to_ast[REAL_GAME_KEY_LIST[0]])



{'expected_feature_values': 1,
 'section_doesnt_exist_setup': False,
 'num_preferences_defined': 2,
 'exemplar_preference_9': 0,
 'exemplar_preference_19': 0,
 'exemplar_preference_32': 0,
 'exemplar_preference_53': 0,
 'exemplar_preference_97': 0,
 'exemplar_preference_98': 0,
 'exemplar_preference_100': 0,
 'exemplar_preference_126': 1,
 'exemplar_preference_139': 0}