In [1]:
%autoreload 2

In [10]:
from ast import literal_eval
import json
import os
import sys
import typing

import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)

import pandas as pd

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src import fitness_energy_utils as utils
from src.fitness_features import *
from src.ast_counter_sampler import *
from src.evolutionary_sampler import *
from src import latest_model_paths

# Load data and model

In [3]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
real_game_texts = [ast_printer.ast_to_string(ast, '\n') for ast in game_asts]


fitness_df = utils.load_fitness_data('../data/fitness_features_1024_regrowths.csv.gz')
print(fitness_df.src_file.unique())
fitness_df.head()

2024-08-16 17:44:56 - ast_utils - INFO     - Loading from cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz
2024-08-16 17:44:57 - ast_utils - INFO     - Finished loading cache file: /Users/guydavidson/tmp/game_generation_cache/interactive-beta-cache.pkl.gz


['interactive-beta.pddl' 'ast-real-regrowth-samples-1024.pddl.gz']


Unnamed: 0,Index,src_file,game_name,domain_name,original_game_name,real,variables_defined_all,variables_defined_prop,variables_used_all,variables_used_prop,...,ast_ngram_constraints_n_4_score,ast_ngram_constraints_n_5_score,ast_ngram_terminal_n_2_score,ast_ngram_terminal_n_3_score,ast_ngram_terminal_n_4_score,ast_ngram_terminal_n_5_score,ast_ngram_scoring_n_2_score,ast_ngram_scoring_n_3_score,ast_ngram_scoring_n_4_score,ast_ngram_scoring_n_5_score
0,0,interactive-beta.pddl,game-0,medium-objects-room-v1,game-0,1,1,1.0,1,1.0,...,0.969177,0.979573,0.965285,0.976289,0.977376,0.980148,0.915272,0.960175,0.999188,1.0
1,1,interactive-beta.pddl,game-2,many-objects-room-v1,game-2,1,1,1.0,1,1.0,...,0.974875,0.978991,0.961082,0.972691,0.97181,0.980943,0.895362,0.910697,0.963311,0.973001
2,2,interactive-beta.pddl,game-3,many-objects-room-v1,game-3,1,1,1.0,1,1.0,...,0.954233,0.972808,0.0,0.0,0.0,0.0,0.841097,0.898279,0.946597,0.955042
3,3,interactive-beta.pddl,game-5,few-objects-room-v1,game-5,1,1,1.0,1,1.0,...,0.986178,0.982877,0.0,0.0,0.0,0.0,0.915272,0.960175,0.999188,1.0
4,4,interactive-beta.pddl,game-6,medium-objects-room-v1,game-6,1,1,1.0,1,1.0,...,0.984002,0.979782,0.0,0.0,0.0,0.0,0.964905,0.971973,0.972563,0.962818


In [20]:
REAL_GAME_INDICES_TO_INCLUDE = [
    0, 4, 6, 7, 11,
    14, 17, 23, 26, 28,
    31, 32, 35, 37, 40,
    41, 42, 45, 49, 51,
    52, 55, 58, 59, 64,
    74, 88, 90, 94, 96,
]

model_key = 'max_exemplar_preferences_by_bcs_with_expected_values'
model_spec = latest_model_paths.MAP_ELITES_MODELS[model_key]
model = typing.cast(MAPElitesSampler, model_spec.load())

key_to_real_game_index = defaultdict(list)
key_to_included_real_game_index = {}
real_game_index_to_key = {}
real_game_fitness_scores = []
ALL_REAL_GAME_KEYS = []
for i, ast in enumerate(game_asts):
    fitness_score, features = model._score_proposal(ast, return_features=True)  # type: ignore
    real_game_fitness_scores.append(fitness_score)
    key = model._features_to_key(ast, features)
    key_to_real_game_index[key].append(i)
    real_game_index_to_key[i] = key
    ALL_REAL_GAME_KEYS.append(key)
    if i in REAL_GAME_INDICES_TO_INCLUDE:
        key_to_included_real_game_index[key] = i




In [38]:
TRANSLATIONS_DIR = '../llm_tests/translations'
TRANSLATION_DATE = '2024_01_12'

with open(f'{TRANSLATIONS_DIR}/human_games_translations_split_{TRANSLATION_DATE}.json') as f:
    human_game_texts = json.load(f)

with open(f'{TRANSLATIONS_DIR}/human_cell_archive_games_translations_split_{TRANSLATION_DATE}.json') as f:
    human_cell_archive_game_texts = json.load(f)

with open(f'{TRANSLATIONS_DIR}/novel_archive_cell_games_translations_split_{TRANSLATION_DATE}.json') as f:
    novel_archive_cell_game_texts = json.load(f)

# Highest fitness participant- and model-generated games

In [37]:
def print_game(fitness_score: float, game_text: str, game_translation: str = '', suffix: str = ''):
    print(f'Fitness: {fitness_score:.3f} {suffix}')
    if game_translation:
        print('=' * 100)
        print(game_translation)    

    print('=' * 100)
    print(game_text)
    print('=' * 100)
    print()


def print_n_highest_fitness_real_games(n: int):
    sorted_indices = np.argsort(real_game_fitness_scores)[::-1]
    for i in sorted_indices[:n]:
        print_game(real_game_fitness_scores[i], real_game_texts[i])


print_n_highest_fitness_real_games(6)

Fitness: 38.013 
(define (game game-93) (:domain many-objects-room-v1)
(:constraints
  (and
    (preference throwBallToBin
      (exists (?d - dodgeball ?h - hexagonal_bin)
        (then
          (once (agent_holds ?d))
          (hold (and (not (agent_holds ?d)) (in_motion ?d)))
          (once (and (not (in_motion ?d)) (in ?h ?d)))
       )
     )
   )
 )
)
(:scoring
  (count throwBallToBin)
)
)

Fitness: 38.013 
(define (game game-51) (:domain few-objects-room-v1)
(:constraints
  (and
    (preference throwToBin
      (exists (?d - dodgeball ?h - hexagonal_bin)
        (then
          (once (agent_holds ?d))
          (hold (and (not (agent_holds ?d)) (in_motion ?d)))
          (once (and (not (in_motion ?d)) (in ?h ?d)))
       )
     )
   )
 )
)
(:scoring
  (count throwToBin)
)
)

Fitness: 38.013 
(define (game game-79) (:domain many-objects-room-v1)
(:constraints
  (and
    (preference throwGolfballToBin
      (exists (?g - golfball ?h - hexagonal_bin)
        (then
          (on

In [36]:
def print_n_highest_fitness_model_generated_games(n: int):
    # the check key[0] == 1 is our auxiliary 'pseudo behavioral characteristic' check
    keys_and_fitness_scores = [(key, fitness) for (key, fitness) in model.fitness_values.items() if key[0] == 1]
    keys_and_fitness_scores.sort(key=lambda x: x[1], reverse=True)

    for key, fitness in keys_and_fitness_scores[:n]:
        print_game(fitness, ast_printer.ast_to_string(model.population[key], '\n'))  # type: ignore


print_n_highest_fitness_model_generated_games(6)


Fitness: 37.804 
(define (game evo-8168-216-1) (:domain many-objects-room-v1)
(:setup
  (exists (?v0 - hexagonal_bin)
    (game-conserved
      (near rug ?v0)
   )
 )
)
(:constraints
  (and
    (preference preference0
      (exists (?v1 - hexagonal_bin ?v0 - dodgeball)
        (then
          (once (agent_holds ?v0))
          (hold (and (not (agent_holds ?v0)) (in_motion ?v0)))
          (once (and (not (in_motion ?v0)) (in ?v1 ?v0)))
       )
     )
   )
 )
)
(:scoring
  (count preference0)
)
)

Fitness: 37.751 
(define (game evo-8182-124-0) (:domain many-objects-room-v1)
(:constraints
  (and
    (preference preference0
      (exists (?v0 - hexagonal_bin ?v1 - dodgeball)
        (then
          (once (agent_holds ?v1))
          (hold (and (not (agent_holds ?v1)) (in_motion ?v1)))
          (once (and (not (in_motion ?v1)) (in ?v0 ?v1)))
       )
     )
   )
 )
)
(:scoring
  (count preference0)
)
)

Fitness: 37.553 
(define (game evo-8147-296-1) (:domain many-objects-room-v1)
(:setup

# Games with largest negative random effects  for human likeness

In [16]:
human_likesness_negative_ranef_df = pd.read_csv('./human_evals_data/human_likeness_largest_negative_ranef.csv')
human_likesness_negative_ranef_df.group = human_likesness_negative_ranef_df.group.astype('category')
human_likesness_negative_ranef_df.group.order
human_likesness_negative_ranef_df

Unnamed: 0,effect,key,group
0,-1.139464,"(1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0)",matched
1,-0.971805,"(1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 2, 0)",matched
2,-0.931835,"(1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0)",matched
3,-0.782567,"(1, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0)",matched
4,-1.826926,"(1, 0, 3, 0, 0, 0, 0, 0, 2, 1, 0, 0)",real
5,-1.138994,"(1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0)",real
6,-0.791772,"(1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0)",real
7,-0.57437,"(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0)",real
8,-0.881436,"(1, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 1)",unmatched_top_30
9,-0.760017,"(1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0)",unmatched_top_30


## Human-created games

In [39]:
for i, row in human_likesness_negative_ranef_df[human_likesness_negative_ranef_df.group == 'real'].iterrows():
    real_game_index = key_to_included_real_game_index[literal_eval(row.key)]
    print_game(real_game_fitness_scores[real_game_index], real_game_texts[real_game_index],  human_game_texts[row.key], f'(Random effect: {row.effect:.3f})')
    

Fitness: 35.773 (Random effect: -1.827)
Setup: Ensure no blocks are on any shelves and maintain a building with exactly six objects throughout the game. All blocks must either be in this building or match the type of another block within it, without being identical.

Gameplay: 
- Hold blocks without placing them inside any building from the initial setup.
- Interact with blocks by holding or moving them, ensuring that if one block is inside a building (not from the initial setup) and another is held, they must touch at some point. Then throw the block that was inside the building.
- By the end of the game, ensure that for any two blocks not in the initial setup building, they are not stacked on each other unless there are two other stacked blocks of the same type inside the non-setup building.

Terminal: The game concludes with the final state of the buildings and blocks.

Scoring: Earn 5 points for each unique object you hold without placing in a building. Gain 100 points if there is 

# Matched model games

In [42]:
for i, row in human_likesness_negative_ranef_df[human_likesness_negative_ranef_df.group == 'matched'].iterrows():
    key = literal_eval(row.key)
    print_game(model.fitness_values[key], ast_printer.ast_to_string(model.population[key], '\n'), human_cell_archive_game_texts[row.key], f'(Random effect: {row.effect:.3f})')
    
    

Fitness: 35.899 (Random effect: -1.139)
Gameplay: move chairs so that they end up on desks and are in motion

Terminal: the game ends once your score is 30 or more

Scoring: you get 1 point for each chair that is on a desk and in motion at the end of the game
(define (game evo-8181-95-0) (:domain medium-objects-room-v1)
(:constraints
  (and
    (preference preference0
      (exists (?v0 - chair ?v1 - desk)
        (at-end
          (and
            (on ?v1 ?v0)
            (not
              (agent_holds ?v0)
           )
            (in_motion ?v0)
         )
       )
     )
   )
 )
)
(:terminal
  (>= (total-score) 30)
)
(:scoring
  (count preference0)
)
)

Fitness: 37.435 (Random effect: -0.972)
Setup: put a hexagonal bin near the rug and keep it there throughout the game.

Gameplay: throw dodgeballs so that they land either inside a doggie bed, inside the hexagonal bin, or simply stop moving after being thrown.

Scoring: you get 1 point for each dodgeball that lands in a doggie bed,

## Unmatched model games

In [43]:
for i, row in human_likesness_negative_ranef_df[human_likesness_negative_ranef_df.group == 'unmatched_top_30'].iterrows():
    key = literal_eval(row.key)
    print_game(model.fitness_values[key], ast_printer.ast_to_string(model.population[key], '\n'), novel_archive_cell_game_texts[row.key], f'(Random effect: {row.effect:.3f})')
    
    

Fitness: 36.118 (Random effect: -0.881)
Setup: Place a hexagonal bin near the door and make sure it stays there throughout the game.

Gameplay: Move objects so they are not moving and are near the rug, and throw dodgeballs so they either stop moving anywhere or come to rest on the hexagonal bin.

Scoring: You get 1 point for each object that is not moving and is near the rug at the end of the game, and 1 point for each time you throw a dodgeball, regardless of where it stops, including if it lands on the hexagonal bin.
(define (game evo-8176-302-1) (:domain medium-objects-room-v1)
(:setup
  (exists (?v0 - hexagonal_bin)
    (game-conserved
      (near door ?v0)
   )
 )
)
(:constraints
  (and
    (preference preference0
      (exists (?v0 - game_object)
        (at-end
          (and
            (not
              (in_motion ?v0)
           )
            (near rug ?v0)
         )
       )
     )
   )
    (preference preference1
      (exists (?v0 - hexagonal_bin ?v1 - dodgeball)
       