In [1]:
%autoreload 2

In [2]:
from argparse import Namespace
from collections import defaultdict
import copy
from datetime import datetime
import difflib
import gzip
import itertools
import os
import pickle
import sys
import typing

from IPython.display import display, Markdown, HTML  # type: ignore
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import swifter
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline
import tatsu
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tqdm.notebook as tqdmn


sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src import fitness_energy_utils as utils
from src.ast_mcmc_regrowth import _load_pickle_gzip
from src.fitness_features import *
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.latest_model_paths import LATEST_FITNESS_FEATURIZER_PATH, LATEST_FITNESS_FUNCTION_DATE_ID, LATEST_FITNESS_FEATURES
from src.ast_counter_sampler import *
from src.ast_utils import cached_load_and_parse_games_from_file, load_games_from_file, _extract_game_id
from src import ast_printer
from src.fitness_features_preprocessing import NGRAM_SCORE_PATTERN
from src.evolutionary_sampler import *

2023-05-14 16:44:34 - ast_utils - DEBUG    - Using cache folder: /misc/vlgscratch4/LakeGroup/guy/game_generation_cache
2023-05-14 16:44:35 - src.ast_utils - DEBUG    - Using cache folder: /misc/vlgscratch4/LakeGroup/guy/game_generation_cache
2023-05-14 16:44:35 - git.cmd - DEBUG    - Popen(['git', 'version'], cwd=/home/gd1279/projects/game-generation-modeling/notebooks, universal_newlines=False, shell=None, istream=None)
2023-05-14 16:44:35 - git.cmd - DEBUG    - Popen(['git', 'version'], cwd=/home/gd1279/projects/game-generation-modeling/notebooks, universal_newlines=False, shell=None, istream=None)


In [3]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
real_game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))  # type: ignore

FEATURIZER_PATH = LATEST_FITNESS_FEATURIZER_PATH
MODEL_PATH = LATEST_FITNESS_FUNCTION_DATE_ID
MAP_ELITES_PATH_SPECS = [
    ('2023_05_13', 'samples', 'evo_sampler_map_elites_fitness_rank_node_count_objects_setup_final'),
    ('2023_05_13', 'samples', 'evo_sampler_map_elites_fitness_rank_node_count_predicates_setup_final'),
    ('2023_05_13', 'samples', 'evo_sampler_map_elites_ucb_node_count_objects_setup_final'),
    ('2023_05_13', 'samples', 'evo_sampler_map_elites_ucb_node_count_predicates_setup_final'),   
]

fitness_featurizer = _load_pickle_gzip(FEATURIZER_PATH)

In [4]:
DEFAULT_MIN_FITNESS = 0.0

def load_map_elites_games(map_elites_path_specs: typing.List[typing.Tuple[str, str, str]], min_fitness: float = DEFAULT_MIN_FITNESS):
    map_elites_games = []
    for date_id, folder, filename in map_elites_path_specs:
        map_elites = typing.cast(MAPElitesSampler, utils.load_data(date_id, folder, filename))
        for key in map_elites.population:
            if map_elites.fitness_values[key] > min_fitness:
                map_elites_games.append((map_elites.population[key], filename))
        
    return map_elites_games


MAP_ELITES_GAMES = load_map_elites_games(MAP_ELITES_PATH_SPECS)

In [5]:
fitness_df = utils.load_fitness_data(LATEST_FITNESS_FEATURES)
print(fitness_df.src_file.unique())
print(fitness_df.shape)

fitness_featurizer.preprocessors[1].forced_output_keys = set([c for c in fitness_df.columns if 'arg_types' in c])

['interactive-beta.pddl' 'ast-real-regrowth-samples-1024.pddl']
(100450, 391)


In [6]:
def create_fitness_df_with_map_elites_games(
    current_fitness_df: pd.DataFrame,
    fitness_featurizer: ASTFitnessFeaturizer,
    real_games: typing.List,
    map_elites_games: typing.List[tuple], 
):

    for game in real_games:
        fitness_featurizer.parse(game, 'interactive_beta.pddl')

    for game, filename in tqdmn.tqdm(map_elites_games):
        fitness_featurizer.parse(game, filename)

    map_elites_fitness_df = fitness_featurizer.to_df()
    current_fitness_real_df = current_fitness_df[current_fitness_df.real == 1]

    return pd.concat([current_fitness_real_df, map_elites_fitness_df], ignore_index=True)

In [7]:
map_elites_fitness_df = create_fitness_df_with_map_elites_games(fitness_df, fitness_featurizer, real_game_asts, MAP_ELITES_GAMES)
print(map_elites_fitness_df.shape, fitness_df.shape)
print(set(map_elites_fitness_df.columns).symmetric_difference(set(fitness_df.columns)))

  0%|          | 0/743 [00:00<?, ?it/s]

2023-05-14 16:45:04 - root - INFO     - No features found for prefix agent_holds_arg_types and suffix setup
2023-05-14 16:45:05 - root - INFO     - No features found for prefix in_motion_arg_types and suffix setup
2023-05-14 16:45:05 - root - INFO     - No features to merge for prefix object_orientation_arg_types and suffix setup
2023-05-14 16:45:05 - root - INFO     - No features found for prefix touch_arg_types and suffix setup
2023-05-14 16:45:05 - root - INFO     - No features found for prefix adjacent_side_3_arg_types and suffix constraints
2023-05-14 16:45:05 - root - INFO     - No features to merge for prefix agent_holds_arg_types and suffix constraints
2023-05-14 16:45:05 - root - INFO     - No features found for prefix between_arg_types and suffix constraints
2023-05-14 16:45:05 - root - INFO     - No features to merge for prefix distance_arg_types and suffix constraints
2023-05-14 16:45:05 - root - INFO     - No features to merge for prefix in_arg_types and suffix constraints

Merging 8 features for prefix adjacent_arg_types and suffix setup
Merging 4 features for prefix adjacent_side_3_arg_types and suffix setup
Merging 6 features for prefix between_arg_types and suffix setup
Merging 5 features for prefix distance_arg_types and suffix setup
Merging 3 features for prefix in_arg_types and suffix setup
Merging 9 features for prefix on_arg_types and suffix setup
Merging 1 features for prefix adjacent_arg_types and suffix constraints
Merging 14 features for prefix on_arg_types and suffix constraints
Merging 9 features for prefix touch_arg_types and suffix constraints
(939, 391) (100450, 391)
set()


## TODO:
* Split this dataset into train and test
* Fine-tune the existing energy function on this (probably adjusting the value of `k`)
* Evaluate the fine-tuned model
* Print the changes in feature weights?