In [1]:
%autoreload 2

In [2]:
from argparse import Namespace
from collections import defaultdict
import copy
from datetime import datetime
import difflib
import gzip
import itertools
import os
import pickle
import sys
import typing

from IPython.display import display, Markdown, HTML  # type: ignore
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import swifter
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline
import tatsu
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tqdm.notebook as tqdmn

import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)
logging.getLogger('numba').setLevel(logging.WARNING)

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src import fitness_energy_utils as utils
from src.ast_mcmc_regrowth import _load_pickle_gzip
from src.fitness_features import *
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.latest_model_paths import LATEST_FITNESS_FEATURIZER_PATH, LATEST_FITNESS_FUNCTION_DATE_ID, LATEST_FITNESS_FEATURES
from src.ast_counter_sampler import *
from src.ast_utils import cached_load_and_parse_games_from_file, load_games_from_file, _extract_game_id
from src import ast_printer
from src.fitness_features_preprocessing import NGRAM_SCORE_PATTERN
from src.evolutionary_sampler import *

2023-06-08 15:12:58 - ast_utils - DEBUG    - Using cache folder: /tmp/game_generation_cache
2023-06-08 15:12:58 - src.ast_utils - DEBUG    - Using cache folder: /tmp/game_generation_cache
  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [3]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
real_game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))  # type: ignore

FEATURIZER_PATH = LATEST_FITNESS_FEATURIZER_PATH
MODEL_DATE_ID = LATEST_FITNESS_FUNCTION_DATE_ID
MODEL_NAME = DEFAULT_SAVE_MODEL_NAME
MAP_ELITES_PATH_SPECS = [
    ('2023_05_23', 'samples', 'map_elites_fitness_rank_node_count_specific_predicates_crossover_cognitive_ops_experiment_final'),
    ('2023_06_02', 'samples', 'map_elites_fitness_rank_node_count_specific_predicates_crossover_cognitive_ops_experiment_final'),
    ('2023_06_05', 'samples', 'map_elites_fitness_rank_node_count_specific_predicates_crossover_cognitive_ops_experiment_final'),
    ('2023_05_23', 'samples', 'map_elites_fitness_rank_specific_predicates_setup_crossover_cognitive_ops_experiment_final'),
    ('2023_06_02', 'samples', 'map_elites_fitness_rank_specific_predicates_setup_crossover_cognitive_ops_experiment_final'),
    ('2023_06_05', 'samples', 'map_elites_fitness_rank_specific_predicates_setup_crossover_cognitive_ops_experiment_final'),
]

fitness_featurizer = _load_pickle_gzip(FEATURIZER_PATH)
fitness_function, feature_names = load_model_and_feature_columns(MODEL_DATE_ID, name=MODEL_NAME, relative_path='..')  # type: ignore
original_fitness_function, _ = load_model_and_feature_columns(MODEL_DATE_ID, name=MODEL_NAME, relative_path='..')  # type: ignore

In [4]:
DEFAULT_MIN_FITNESS = 0.0

def load_map_elites_games(map_elites_path_specs: typing.List[typing.Tuple[str, str, str]], min_fitness: float = DEFAULT_MIN_FITNESS):
    map_elites_games = []
    for date_id, folder, filename in map_elites_path_specs:
        map_elites = typing.cast(MAPElitesSampler, utils.load_data(date_id, folder, filename))
        for key in map_elites.population:
            if map_elites.fitness_values[key] > min_fitness:
                map_elites_games.append((map_elites.population[key], filename))
        
    return map_elites_games


MAP_ELITES_GAMES = load_map_elites_games(MAP_ELITES_PATH_SPECS)

In [5]:
fitness_df = utils.load_fitness_data(LATEST_FITNESS_FEATURES)
print(fitness_df.src_file.unique())
print(fitness_df.shape)

fitness_featurizer.preprocessors[1].forced_output_keys = set([c for c in fitness_df.columns if 'arg_types' in c])

['interactive-beta.pddl' 'ast-real-regrowth-samples-1024.pddl.gz']
(100450, 396)


In [6]:
def create_fitness_df_with_map_elites_games(
    fitness_featurizer: ASTFitnessFeaturizer,
    map_elites_games: typing.List[tuple], 
):

    for game, filename in tqdmn.tqdm(map_elites_games):
        fitness_featurizer.parse(game, filename)

    map_elites_fitness_df = fitness_featurizer.to_df(use_prior_values=[True, False])
    return utils.process_fitness_df(pd.concat([map_elites_fitness_df], ignore_index=True))

In [7]:
map_elites_fitness_df = create_fitness_df_with_map_elites_games(fitness_featurizer, MAP_ELITES_GAMES)
print(map_elites_fitness_df.shape, fitness_df.shape)

unexpected_columns = set(map_elites_fitness_df.columns) - set(fitness_df.columns)
missing_columns = set(fitness_df.columns) - set(map_elites_fitness_df.columns)

if len(unexpected_columns) > 0:
    raise ValueError(f'Unexpected columns: {unexpected_columns}')

if len(missing_columns) > 0:
    print(f'Missing columns: {missing_columns}')
    map_elites_fitness_df = map_elites_fitness_df.reindex(columns=fitness_df.columns, fill_value=0.0)

missing_columns = set(fitness_df.columns) - set(map_elites_fitness_df.columns)
if len(missing_columns) > 0:
    raise ValueError(f'Failed to fill missing columns: {missing_columns}')

print(map_elites_fitness_df.shape)
print(map_elites_fitness_df.isna().sum().sum())

  0%|          | 0/3076 [00:00<?, ?it/s]

2023-06-08 15:15:46 - root - INFO     - No features found for prefix adjacent_side_3_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features found for prefix agent_holds_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features to merge for prefix distance_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features found for prefix in_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features found for prefix in_motion_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features to merge for prefix object_orientation_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features found for prefix touch_arg_types and suffix setup
2023-06-08 15:15:46 - root - INFO     - No features to merge for prefix adjacent_arg_types and suffix constraints
2023-06-08 15:15:46 - root - INFO     - No features found for prefix adjacent_side_3_arg_types and suffix constraints
2023-06-08 15:1

(3076, 310) (100450, 396)
Missing columns: {'distance_arg_types_blocks_receptacles_constraints', 'agent_holds_arg_types_furniture_constraints', 'agent_holds_arg_types_agent_constraints', 'in_arg_types_building_receptacles_constraints', 'in_motion_arg_types_furniture_constraints', 'agent_holds_arg_types_small_objects_constraints', 'distance_arg_types_blocks_furniture_setup', 'adjacent_arg_types_furniture_large_objects_setup', 'adjacent_side_3_arg_types_furniture_sides_large_objects_setup', 'distance_arg_types_large_objects_room_features_constraints', 'in_arg_types_balls_balls_constraints', 'on_arg_types_furniture_balls_setup', 'object_orientation_arg_types_other_constraints', 'adjacent_arg_types_other_constraints', 'in_arg_types_receptacles_large_objects_constraints', 'distance_arg_types_blocks_room_features_setup', 'distance_arg_types_large_objects_room_features_setup', 'on_arg_types_blocks_blocks_constraints', 'agent_holds_arg_types_other_constraints', 'on_arg_types_large_objects_ball

In [8]:
combined_fitness_df = pd.concat([fitness_df, map_elites_fitness_df], ignore_index=True).drop(columns=['Index'])
print(combined_fitness_df.shape, combined_fitness_df.real.sum())


(103526, 395) 98


In [9]:
no_regrowths_df = combined_fitness_df[combined_fitness_df.src_file != 'ast-real-regrowth-samples-1024.pddl.gz']

print(no_regrowths_df.shape, no_regrowths_df.real.sum())


(3174, 395) 98


## TODO:
* Split this dataset into train and test
* Fine-tune the existing energy function on this (probably adjusting the value of `k`)
* Evaluate the fine-tuned model
* Print the changes in feature weights?

In [10]:
RANDOM_SEED = DEFAULT_RANDOM_SEED 


data_tensor, _ = utils._input_data_to_train_test_tensors(input_data=combined_fitness_df, feature_columns=feature_names,
        split_test_set=False, random_seed=RANDOM_SEED, train_prop=utils.DEFAULT_TRAINING_PROP, ignore_original_game=True)

train_tensor, test_tensor = utils.train_test_split(data_tensor, random_state=RANDOM_SEED,
                train_size=utils.DEFAULT_TRAINING_PROP)


if test_tensor is not None:
    print(f'Train tensor shape: {train_tensor.shape} | Test tensor shape: {test_tensor.shape}')  # type: ignore
else:
    print(f'Train tensor shape: {train_tensor.shape}')



Train tensor shape: torch.Size([78, 1056, 371]) | Test tensor shape: torch.Size([20, 1056, 371])


In [11]:
data_tensor_by_original_game, _ = utils._input_data_to_train_test_tensors(input_data=combined_fitness_df, feature_columns=feature_names,
        split_test_set=False, random_seed=RANDOM_SEED, train_prop=utils.DEFAULT_TRAINING_PROP, ignore_original_game=False)


train_tensor_by_original_game, test_tensor_by_original_game = utils.train_test_split(data_tensor_by_original_game, random_state=RANDOM_SEED,
                train_size=utils.DEFAULT_TRAINING_PROP)


data_tensor_by_original_game.shape, train_tensor_by_original_game.shape, test_tensor_by_original_game.shape

(torch.Size([98, 1025, 371]),
 torch.Size([78, 1025, 371]),
 torch.Size([20, 1025, 371]))

In [12]:
# k = train_tensor.shape[1] - 1
# print(f'Using k={k} for fitness function')
# fitness_function.named_steps['fitness'].train_kwargs['k'] = k  # type: ignore
fitness_function.named_steps['fitness'].init_model = False
fitness_function.named_steps['fitness'].train_kwargs['device'] = 'cpu'  # type: ignore
fitness_function.named_steps['fitness'].train_kwargs['lr'] = fitness_function.named_steps['fitness'].train_kwargs['lr'] / 10  # type: ignore
fitness_function.named_steps['fitness'].train_kwargs['should_tqdm'] = True  # type: ignore
fitness_function.named_steps['fitness'].train_kwargs['split_validation_from_train'] = True  # type: ignore
fitness_function.named_steps['fitness'].train_kwargs['shuffle_negatives'] = True  # type: ignore
fitness_function.named_steps['fitness'].train_kwargs['shuffle_validation_negatives'] = False  # type: ignore
fitness_function.named_steps['fitness'].train_kwargs['patience_epochs'] = 100  # type: ignore

fine_tuned_fitness_function = fitness_function.fit(train_tensor)

  1%|          | 112/20000 [00:17<51:04,  6.49it/s, train_loss=1.69, val_loss=2.07, min_loss=1.66, patience_update_epoch=11]  


In [13]:
print('Before tuning, training set:')
print(utils.default_multiple_scoring(original_fitness_function, train_tensor))

print('After tuning, training set:')
print(utils.default_multiple_scoring(fitness_function, train_tensor))

if test_tensor is not None:
    print('Before tuning, test set:')
    print(utils.default_multiple_scoring(original_fitness_function, test_tensor))

    print('After tuning, test set:')
    print(utils.default_multiple_scoring(fitness_function, test_tensor))


Before tuning, training set:
{'loss': -3.247014284133911, 'overall_ecdf': -0.008316492090530554, 'single_game_rank': 0.9902296662330627, 'single_game_min_rank': 0.9658767580986023, 'energy_of_negative@1%': 90.32298278808594, 'energy_of_negative@5%': 88.08563995361328}
After tuning, training set:
{'loss': -3.215593099594116, 'overall_ecdf': -0.007906200814854662, 'single_game_rank': 0.991007387638092, 'single_game_min_rank': 0.963981032371521, 'energy_of_negative@1%': 90.47285461425781, 'energy_of_negative@5%': 88.191162109375}
Before tuning, test set:
{'loss': -3.2634761333465576, 'overall_ecdf': -0.008750000000000003, 'single_game_rank': 0.9912322163581848, 'single_game_min_rank': 0.964928925037384, 'energy_of_negative@1%': 90.46813201904297, 'energy_of_negative@5%': 88.24662017822266}
After tuning, test set:
{'loss': -3.344238758087158, 'overall_ecdf': -0.009618844696969695, 'single_game_rank': 0.9911373853683472, 'single_game_min_rank': 0.9620853066444397, 'energy_of_negative@1%': 9

In [None]:
original_data_tensor, _ = utils._input_data_to_train_test_tensors(input_data=fitness_df, feature_columns=feature_names,
        split_test_set=False, random_seed=RANDOM_SEED, train_prop=utils.DEFAULT_TRAINING_PROP, ignore_original_game=True)

print(original_data_tensor.shape)

print('Before tuning, original full dataset:')
print(utils.default_multiple_scoring(original_fitness_function, original_data_tensor))

print('After tuning, original full dataset:')
print(utils.default_multiple_scoring(fitness_function, original_data_tensor))

In [None]:
K = 10

original_weights = original_fitness_function.named_steps['fitness'].model.fc1.weight.data.detach().squeeze()
post_fine_tune_weights = fitness_function.named_steps['fitness'].model.fc1.weight.data.detach().squeeze()

weight_changed = original_weights - post_fine_tune_weights
larger_in_original = torch.topk(weight_changed, k=K, largest=True)
larger_in_fine_tuned = torch.topk(weight_changed, k=K, largest=False)

lines = []

lines.append('### Features whose weights dropped the most in fine-tuning (lower = more predictive of real games):')
for i in range(K):
    idx = larger_in_original.indices[i]
    lines.append(f'{i+1}. {feature_names[idx]} (Δ{larger_in_original.values[i]:.4f}, {original_weights[idx]:.4f} -> {post_fine_tune_weights[idx]:.4f})')

lines.append('### Features whose weights increased the most in fine-tuning (higher = less predictive of real games):')
for i in range(K):
    idx = larger_in_fine_tuned.indices[i]
    lines.append(f'{i+1}. {feature_names[idx]} (Δ{larger_in_original.values[i]:.4f}, {original_weights[idx]:.4f} -> {post_fine_tune_weights[idx]:.4f})')

display(Markdown('\n'.join(lines)))

In [None]:
K = 20
top_features = torch.topk(post_fine_tune_weights, K)
bottom_features = torch.topk(post_fine_tune_weights, K, largest=False)

lines = []

lines.append('### Features with largest negative weights post-fine-tuning (most predictive of real games):')
for i in range(K):
    lines.append(f'{i+1}. {feature_names[bottom_features.indices[i]]} ({bottom_features.values[i]:.4f})')

lines.append('### Features with largest positive weights post-fine-tuning (most predictive of fake games):')
for i in range(K):
    lines.append((f'{i+1}. {feature_names[top_features.indices[i]]} ({top_features.values[i]:.4f})'))

display(Markdown('\n'.join(lines)))
