In [1]:
%autoreload 2

In [2]:
from collections import defaultdict
from argparse import Namespace
import copy
import gzip
import itertools
import os
import pickle
import sys
import typing

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tatsu
import tatsu.ast
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tqdm
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src.ast_utils import _extract_game_id
from src import fitness_energy_utils as utils
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.fitness_features import *
from src.ast_counter_sampler import *
from src.ast_mcmc_regrowth import *

In [3]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
real_game_texts = [ast_printer.ast_to_string(ast, '\n') for ast in game_asts]
regrown_game_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples.pddl'))
regrown_game_1024_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples-1024.pddl'))
print(len(real_game_texts), len(regrown_game_texts), len(regrown_game_texts) / 98, len(regrown_game_1024_texts), len(regrown_game_1024_texts) / 98)


98 6272 64.0 100352 1024.0


In [4]:
fitness_df = utils.load_fitness_data('../data/fitness_features_1024_regrowths.csv.gz')
print(fitness_df.src_file.unique())
fitness_df.head()

['interactive-beta.pddl' 'ast-real-regrowth-samples-1024.pddl']


Unnamed: 0,Index,src_file,game_name,domain_name,original_game_name,real,all_variables_defined,all_variables_used,all_preferences_used,setup_objects_used,...,ast_ngram_constraints_n_6_score,ast_ngram_constraints_n_7_score,ast_ngram_terminal_n_2_score,ast_ngram_terminal_n_3_score,ast_ngram_terminal_n_4_score,ast_ngram_terminal_n_5_score,ast_ngram_scoring_n_2_score,ast_ngram_scoring_n_3_score,ast_ngram_scoring_n_4_score,ast_ngram_scoring_n_5_score
0,0,interactive-beta.pddl,6172feb1665491d1efbce164-0,medium-objects-room-v1,6172feb1665491d1efbce164-0,1,1,1,1,1.0,...,0.978689,0.985999,0.972647,0.982941,0.983447,0.985028,0.923947,0.92041,0.977171,0.98816
1,1,interactive-beta.pddl,5f77754ba932fb2c4ba181d8-2,many-objects-room-v1,5f77754ba932fb2c4ba181d8-2,1,1,1,1,1.0,...,0.984116,0.984782,0.969335,0.980352,0.979375,0.985628,0.910105,0.933163,0.949498,0.96451
2,2,interactive-beta.pddl,614b603d4da88384282967a7-3,many-objects-room-v1,614b603d4da88384282967a7-3,1,1,1,1,0.0,...,0.978814,0.982546,0.0,0.0,0.0,0.0,0.854408,0.866689,0.928334,0.945839
3,3,interactive-beta.pddl,5bc79f652885710001a0e82a-5,few-objects-room-v1,5bc79f652885710001a0e82a-5,1,1,1,1,0.0,...,0.98897,0.988449,0.0,0.0,0.0,0.0,0.923947,0.92041,0.977171,0.98816
4,4,interactive-beta.pddl,614dec67f6eb129c3a77defd-6,medium-objects-room-v1,614dec67f6eb129c3a77defd-6,1,1,1,1,0.25,...,0.984601,0.983148,0.0,0.0,0.0,0.0,0.978064,0.979764,0.973707,0.988379


# Plan of attack
* Use the code in `ast_counter_sampler.py` to generate a sample from the MLE
* Score it with a fitness function adapted to working with a single example at a time
* At each subsequent iteration, regrow the game from a random node, and score the regrowth
* Since we have energy scores before and after, we can either accept greedily if $E_{new} < E_{old}$, or accept with probability $\exp(\beta (E_{old} - E_{new} ))$

In [5]:
DEFAULT_ARGS = argparse.Namespace(
    grammar_file=os.path.join('..', DEFAULT_GRAMMAR_FILE),
    parse_counter=False,
    counter_output_path=os.path.join('..', DEFAULT_COUNTER_OUTPUT_PATH),
    random_seed=DEFUALT_RANDOM_SEED,
)

In [11]:
FITNESS_MODEL_DATE_ID = '2023_03_07_2'
FITNESS_FEATURIZER_PATH = '../models/fitness_featurizer_2023_03_07.pkl.gz'


mcmc = MCMCRegrowthSampler(DEFAULT_ARGS,
    FITNESS_MODEL_DATE_ID, FITNESS_FEATURIZER_PATH, greedy_acceptance=True, 
    plateau_patience_steps=100, max_steps=10000,
    fitness_function_relative_path='..')  #   acceptance_temperature=10.0, 

In [13]:
n_samples = 10
mcmc.multiple_samples(n_samples, verbose=1, should_tqdm=True)

  0%|          | 0/10 [00:00<?, ?it/s]

Plateaued at step 260 with energy -9.61622
Plateaued at step 460 with energy -8.76074
Plateaued at step 195 with energy -7.55684
Plateaued at step 332 with energy -13.00698
Plateaued at step 128 with energy -10.08365
Plateaued at step 393 with energy -11.71130
Plateaued at step 494 with energy -11.69675
Plateaued at step 179 with energy -8.69254
Plateaued at step 208 with energy -9.80241
Plateaued at step 236 with energy -10.34956


In [14]:
mcmc.visualize_sample(3)

### Energy of visualized game: -13.007

### Top features contributing to the game's energy
feature name: **value** = (original feature value => regrown feature value) * weight

| Features increasing energy (= more fake)                                   | Features decreasing energy (= more real)                                     |
|----------------------------------------------------------------------------|------------------------------------------------------------------------------|
| section_exists_setup: **0.406** = (1.000) * 0.406                          | ast_ngram_full_n_7_score: **-4.587** = (1.000) * -4.587                      |
| section_exists_terminal: **0.361** = (1.000) * 0.361                       | ast_ngram_constraints_n_7_score: **-3.689** = (1.000) * -3.689               |
| pref_forall_used_correct: **0.354** = (1.000) * 0.354                      | ast_ngram_setup_n_5_score: **-1.265** = (1.000) * -1.265                     |
| mean_depth_terminal_1: **0.168** = (1.000) * 0.168                         | all_variables_used: **-1.108** = (1.000) * -1.108                            |
| mean_depth_setup_2: **0.142** = (1.000) * 0.142                            | all_preferences_used: **-0.914** = (1.000) * -0.914                          |
| mean_depth_constraints_2: **0.123** = (1.000) * 0.123                      | no_two_number_operations: **-0.825** = (1.000) * -0.825                      |
| pref_forall_pref_forall_correct_types_correct: **0.063** = (1.000) * 0.063 | ast_ngram_terminal_n_5_score: **-0.649** = (1.000) * -0.649                  |
| max_depth_setup_1: **0.032** = (1.000) * 0.032                             | pref_forall_pref_forall_correct_arity_correct: **-0.431** = (1.000) * -0.431 |
|                                                                            | ast_ngram_scoring_n_5_score: **-0.385** = (1.000) * -0.385                   |
|                                                                            | all_variables_defined: **-0.293** = (1.000) * -0.293                         |
|                                                                            | mean_depth_scoring_1: **-0.214** = (1.000) * -0.214                          |
|                                                                            | max_depth_constraints_1: **-0.175** = (1.000) * -0.175                       |
|                                                                            | max_depth_terminal_1: **-0.070** = (1.000) * -0.070                          |
|                                                                            | no_adjacent_same_modal: **-0.044** = (1.000) * -0.044                        |
|                                                                            | max_depth_scoring_1: **-0.018** = (1.000) * -0.018                           |

### Game:

```pddl
(define (game mcmc-3) (:domain many-objects-room-v1)
(:setup
  (forall (?v0 - curved_wooden_ramp)
    (game-conserved
      (on ?v0 back)
    )
  )
)
(:constraints
  (and
    (forall (?v1 - dodgeball)
      (and
        (preference preference0
          (exists (?v2 - doggie_bed)
            (then
              (once (touch ?v1 ?v2) )
              (hold (not (agent_holds ?v1) ) )
              (hold-while (in_motion ?v1) (in_motion ?v1) )
              (once (on ?v2 door) )
            )
          )
        )
      )
    )
  )
)
(:terminal
  (>= 5 (total-score) )
)
(:scoring
  (count-once-per-objects preference0:blue_dodgeball)
)
)
```

In [19]:
n_samples = 5
start = 2
end = start + n_samples


for original_index in range(start, end):
    print(f'Generating samples starting from original index {original_index} (id {_extract_game_id(real_game_texts[original_index])})')
    mcmc.multiple_samples(n_samples, verbose=1, should_tqdm=True, initial_proposal=game_asts[original_index])

    # print()
    # for i in range(n_samples * (original_index - start), n_samples * (original_index - start + 1)):
    #     print(i, mcmc.samples[i][2])

Generating samples starting from original index 2 (id 614b603d4da88384282967a7-3)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 101 with energy 1.65550 (initial proposal energy: 1.65550)
Plateaued at step 101 with energy 1.65550 (initial proposal energy: 1.65550)
Plateaued at step 101 with energy 1.65550 (initial proposal energy: 1.65550)
Plateaued at step 101 with energy 1.65550 (initial proposal energy: 1.65550)
Plateaued at step 178 with energy 1.63796 (initial proposal energy: 1.65550)
Generating samples starting from original index 3 (id 5bc79f652885710001a0e82a-5)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 123 with energy 1.93191 (initial proposal energy: 2.09366)
Plateaued at step 146 with energy 1.93191 (initial proposal energy: 2.09366)
Plateaued at step 109 with energy 1.76072 (initial proposal energy: 2.09366)
Plateaued at step 130 with energy 1.76072 (initial proposal energy: 2.09366)
Plateaued at step 106 with energy 1.93191 (initial proposal energy: 2.09366)
Generating samples starting from original index 4 (id 614dec67f6eb129c3a77defd-6)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 127 with energy -7.88752 (initial proposal energy: -7.80552)
Plateaued at step 141 with energy -7.82221 (initial proposal energy: -7.80552)
Plateaued at step 101 with energy -7.80552 (initial proposal energy: -7.80552)
Plateaued at step 163 with energy -7.93101 (initial proposal energy: -7.80552)
Plateaued at step 108 with energy -7.82221 (initial proposal energy: -7.80552)
Generating samples starting from original index 5 (id 615b40bb6cdb0f1f6f291f45-8)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 158 with energy -12.69764 (initial proposal energy: -12.62111)
Plateaued at step 147 with energy -12.65703 (initial proposal energy: -12.62111)
Plateaued at step 199 with energy -12.86351 (initial proposal energy: -12.62111)
Plateaued at step 162 with energy -12.69764 (initial proposal energy: -12.62111)
Plateaued at step 259 with energy -12.95239 (initial proposal energy: -12.62111)
Generating samples starting from original index 6 (id 615452aaabb932ada88ef3ca-9)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 223 with energy -7.98485 (initial proposal energy: -7.71727)
Plateaued at step 101 with energy -7.71727 (initial proposal energy: -7.71727)
Plateaued at step 101 with energy -7.71727 (initial proposal energy: -7.71727)
Plateaued at step 108 with energy -7.78715 (initial proposal energy: -7.71727)
Plateaued at step 101 with energy -7.71727 (initial proposal energy: -7.71727)


In [36]:
mcmc.visualize_sample(27)

### Energy of real game: 2.094 | Energy of regrown game: 1.761 | Difference: -0.333

### Top features changing the game's energy
feature name: **value** = (original feature value => regrown feature value) * weight

| Features increasing energy (= more fake)                            | Features decreasing energy (= more real)                         |
|---------------------------------------------------------------------|------------------------------------------------------------------|
| compositionality_structure_1: **0.053** = (1.000 => 0.000) * -0.053 | mean_depth_constraints_0: **-0.193** = (0.000 => 1.000) * -0.193 |
| compositionality_structure_0: **0.047** = (1.000 => 0.000) * -0.047 | max_depth_constraints_1: **-0.175** = (0.000 => 1.000) * -0.175  |
| no_adjacent_same_modal: **0.044** = (1.000 => 0.000) * -0.044       | mean_depth_constraints_2: **-0.123** = (1.000 => 0.000) * 0.123  |
| max_depth_constraints_2: **0.014** = (1.000 => 0.000) * -0.014      |                                                                  |

### Game Diffs

0,1,2,3,4,5
f,1,(define (game 5bc79f652885710001a0e82a-5) (:domain few-objects-room-v1),f,1.0,(define (game 5bc79f652885710001a0e82a-5) (:domain few-objects-room-v1)
,2,(:constraints,,2.0,(:constraints
,3,(and,,3.0,(and
n,4,(preference throwBallToBin,n,4.0,(preference preference0
,5,(exists (?d - dodgeball ?h - hexagonal_bin),,5.0,(exists (?v0 - ball)
,6,(then,,6.0,(at-end
,7,(once (and (agent_holds ?d) (= (distance agent ?h) 1) ) ),,7.0,(agent_holds ?v0)
,8,(hold (and (not (agent_holds ?d) ) (in_motion ?d) ) ),,,
,9,(once (and (not (in_motion ?d) ) (in ?h ?d) ) ),,,
,10,),,8.0,)

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,


### Feature Diffs

no_adjacent_same_modal: -1.000 (1.000 => 0.000)
compositionality_structure_0: -1.000 (1.000 => 0.000)
compositionality_structure_1: -1.000 (1.000 => 0.000)
mean_depth_constraints_2: -1.000 (1.000 => 0.000)
max_depth_constraints_2: -1.000 (1.000 => 0.000)
mean_depth_constraints_0: 1.000 (0.000 => 1.000)
max_depth_constraints_1: 1.000 (0.000 => 1.000)


In [28]:
mcmc.feature_names[-5:]

['ast_ngram_full_n_7_score',
 'ast_ngram_setup_n_5_score',
 'ast_ngram_constraints_n_7_score',
 'ast_ngram_terminal_n_5_score',
 'ast_ngram_scoring_n_5_score']

In [25]:
from argparse import Namespace
from src.fitness_features import *
args = Namespace(no_binarize=False, no_merge=False)
featurizer = build_fitness_featurizer(args)

In [26]:
a = grammar_parser.parse(s)

In [28]:
ps = {k:v for k, v in featurizer.parse(a, 'mcmc', True).items() if 'predicate_under_modal' in k}
len(ps)

50

In [32]:
{k:v for k, v in ps.items() if v > 0}

{'predicate_under_modal_hold_in_motion': 1,
 'predicate_under_modal_hold_agent_holds': 1,
 'predicate_under_modal_once_agent_holds': 1}

In [51]:
mcmc.samples[5][0][3][1].setup.exists_args.setup.and_args[0].setup.exists_vars

{'variables': [{'var_names': ['?v1'], 'var_type': {'type': {'type_names': ['dodgeball', 'chair', 'curved_wooden_ramp', 'wall', 'book', 'alarm_clock', 'dodgeball'], 'parseinfo': {'tokenizer': None, 'rule': 'either_types', 'pos': 3357, 'endpos': 3357, 'line': 3357, 'endline': 3357, 'alerts': []}}, 'parseinfo': {'tokenizer': None, 'rule': 'type_definition', 'pos': 3358, 'endpos': 3358, 'line': 3358, 'endline': 3358, 'alerts': []}}, 'parseinfo': {'tokenizer': None, 'rule': 'variable_type_def', 'pos': 3359, 'endpos': 3359, 'line': 3359, 'endline': 3359, 'alerts': []}}, {'var_names': ['?v1'], 'var_type': {'type': 'hexagonal_bin', 'parseinfo': {'tokenizer': None, 'rule': 'type_definition', 'pos': 7, 'endpos': 7, 'line': 7, 'endline': 7, 'alerts': []}}, 'parseinfo': {'tokenizer': None, 'rule': 'variable_type_def', 'pos': 8, 'endpos': 8, 'line': 8, 'endline': 8, 'alerts': []}}], 'parseinfo': {'tokenizer': None, 'rule': 'variable_list', 'pos': 9, 'endpos': 9, 'line': 9, 'endline': 9, 'alerts': [

In [None]:
mcmc._proposal_to_features(mcmc.samples[0][0])['ast_ngram_n_7_score']

In [None]:
mcmc.fitness_featurizer.full_ast_registry[0].n_gram_model.score(mcmc.samples[0][0], log=True)

In [None]:
mcmc.fitness_featurizer.preprocessors[0].scale_series_min_max_values

In [None]:
# %prun -r 
n_samples = 5
start = 0
end = start + n_samples

for original_index in range(start, end):
    print(f'Generating samples starting from original index {original_index} (id {_extract_game_id(real_game_texts[original_index])})')
    mcmc.multiple_samples(n_samples, verbose=1, should_tqdm=True, initial_proposal=game_asts[original_index])

    print()
    for i in range(n_samples * (original_index - start), n_samples * (original_index - start + 1)):
        print(i, mcmc.samples[i][2])

In [None]:
mcmc.visualize_sample(14)

In [None]:
mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].score(ast_printer.ast_to_string(mcmc.samples[14][0], ' '))

In [None]:
mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].score(ast_printer.ast_to_string(mcmc.samples[14][3], ' '))

In [None]:
ngram_model = mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1]

sample_ngrams = [('constraints', 'and', 'forall', 'wall', 'and'), ('and', 'forall', 'wall', 'and', 'preference'), ('forall', 'wall', 'and', 'preference', 'preferenceName'), ('wall', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]
original_ngrams = [('constraints', 'and', 'forall', 'building', 'and'), ('and', 'forall', 'building', 'and', 'preference'), ('forall', 'building', 'and', 'preference', 'preferenceName'), ('building', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]

print(f'Sample ngrams: {np.mean([ngram_model.ngram_logprobs[s] for s in sample_ngrams])} | ')
for s in sample_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])

print()
print(f'Original ngrams: {np.mean([ngram_model.ngram_logprobs[s] for s in original_ngrams])}')
for s in original_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])
print()

for s in sample_ngrams[:]:
    if s in original_ngrams:
        sample_ngrams.remove(s)
        original_ngrams.remove(s)

print('Sample remaining ngrams')
for s in sample_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])

print()
print('Original remaining ngrams')
for s in original_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])

In [None]:
original_ngrams = [('constraints', 'and', 'forall', 'building', 'and'), ('and', 'forall', 'building', 'and', 'preference'), ('forall', 'building', 'and', 'preference', 'preferenceName'), ('building', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]
print('Original ngrams')
for s in original_ngrams:
    print(s, mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].ngram_counts[s], mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].ngram_logprobs[s])

In [None]:
mcmc.visualize_sample(7)

In [None]:
mcmc.visualize_sample(15)

### Notes from looking at these games
* **TODO:** debug the 'section-without' logic, it seems to not work
* **TODO:** add features for preferences with different lengths of modals

In [None]:
print(ast_printer.ast_to_string(samples[8][0], '\n'))

In [None]:
fitness_featurizer = build_fitness_featurizer(DEFAULT_ARGS)
features = fitness_featurizer.parse(samples[7][0], 'mcmc', True)
{k: v for k, v in features.items() if 'correctly' in k or 'forall' in k or 'no' in k}

In [None]:
with gzip.open(DEFAULT_FITNESS_FUNCTION_PATH, 'rb') as f:
    cv_fitness_model = pickle.load(f)

In [None]:
weights = cv_fitness_model.named_steps['fitness'].model.fc1.weight.data.detach().numpy().squeeze()
bias = cv_fitness_model.named_steps['fitness'].model.fc1.bias.data.detach().numpy().squeeze()
print(weights.mean(), bias)

In [None]:
plt.hist(weights, bins=100)
plt.title('Fitness Model Weights')
plt.xlabel('Weight magnitude')
plt.ylabel('Count')

In [None]:
import ast_crossover_sampler
asts = [ast for ast in cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', 
    mcmc.grammar_parser, False, relative_path='..')]

mcmc_with_crossover = MCMCRegrowthCrossoverSampler(DEFAULT_ARGS, 
    ast_crossover_sampler.CrossoverType.SAME_RULE, asts, 0.25,
    '../models/cv_fitness_model_2022_12_26.pkl.gz', 
    greedy_acceptance=True, plateau_patience_steps=20, acceptance_temperature=10.0, max_steps=1000)

In [None]:
mcmc_with_crossover.multiple_samples(10, verbose=2, should_tqdm=True)
print([x[2] for x in mcmc_with_crossover.samples])

In [None]:
print([x[2] for x in mcmc_with_crossover.samples])