In [2]:
%autoreload 2

In [3]:
from collections import defaultdict
from argparse import Namespace
import copy
import gzip
import itertools
import os
import pickle
import sys
import typing

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tatsu
import tatsu.ast
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tqdm
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src.ast_utils import _extract_game_id
from src import fitness_energy_utils as utils
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.fitness_features import *
from src.ast_counter_sampler import *
from src.ast_mcmc_regrowth import *

In [4]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
real_game_texts = [ast_printer.ast_to_string(ast, '\n') for ast in game_asts]
regrown_game_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples.pddl'))


In [5]:
fitness_df = utils.load_fitness_data()
print(fitness_df.src_file.unique())
fitness_df.head()

['interactive-beta.pddl' 'ast-real-regrowth-samples.pddl']


Unnamed: 0,Index,src_file,game_name,domain_name,real,original_game_name,all_variables_defined,all_variables_used,all_preferences_used,setup_objects_used,...,ast_ngram_n_5_0,ast_ngram_n_5_1,ast_ngram_n_5_2,ast_ngram_n_5_3,ast_ngram_n_5_4,ast_ngram_n_5_5,ast_ngram_n_5_6,ast_ngram_n_5_7,ast_ngram_n_5_8,ast_ngram_n_5_9
0,0,interactive-beta.pddl,6172feb1665491d1efbce164-0,medium-objects-room-v1,1,6172feb1665491d1efbce164-0,1,1,1,1.0,...,1,1,1,1,1,1,1,1,1,1
1,1,interactive-beta.pddl,5f77754ba932fb2c4ba181d8-2,many-objects-room-v1,1,5f77754ba932fb2c4ba181d8-2,1,1,1,1.0,...,1,1,0,1,0,1,0,1,1,1
2,2,interactive-beta.pddl,614b603d4da88384282967a7-3,many-objects-room-v1,1,614b603d4da88384282967a7-3,1,1,1,0.0,...,1,0,0,0,0,0,0,0,0,0
3,3,interactive-beta.pddl,5bc79f652885710001a0e82a-5,few-objects-room-v1,1,5bc79f652885710001a0e82a-5,1,1,1,0.0,...,1,1,1,1,1,1,1,1,1,1
4,4,interactive-beta.pddl,614dec67f6eb129c3a77defd-6,medium-objects-room-v1,1,614dec67f6eb129c3a77defd-6,1,1,1,0.25,...,1,1,0,1,1,1,1,1,1,1


# Plan of attack
* Use the code in `ast_counter_sampler.py` to generate a sample from the MLE
* Score it with a fitness function adapted to working with a single example at a time
* At each subsequent iteration, regrow the game from a random node, and score the regrowth
* Since we have energy scores before and after, we can either accept greedily if $E_{new} < E_{old}$, or accept with probability $\exp(\beta (E_{old} - E_{new} ))$

In [6]:
DEFAULT_ARGS = argparse.Namespace(
    grammar_file=os.path.join('..', DEFAULT_GRAMMAR_FILE),
    parse_counter=False,
    counter_output_path=os.path.join('..', DEFAULT_COUNTER_OUTPUT_PATH),
    random_seed=DEFUALT_RANDOM_SEED,
)

In [7]:
FITNESS_MODEL_PATH = '../models/cv_fitness_model_2023_02_08.pkl.gz'
FITNESS_FEATURIZER_PATH = '../models/fitness_featurizer_2023_02_08.pkl.gz'

feature_names = [c for c in fitness_df.columns if c not in NON_FEATURE_COLUMNS]

mcmc = MCMCRegrowthSampler(DEFAULT_ARGS, feature_names,
    FITNESS_MODEL_PATH, FITNESS_FEATURIZER_PATH, greedy_acceptance=True, 
    plateau_patience_steps=500, max_steps=5000)  #   acceptance_temperature=10.0, 

In [9]:
# %prun -r 
n_samples = 5
start = 1
end = start + 5

for original_index in range(start, end):
    print(f'Generating samples starting from original index {original_index} (id {_extract_game_id(real_game_texts[original_index])})')
    mcmc.multiple_samples(n_samples, verbose=1, should_tqdm=True, initial_proposal=game_asts[original_index])

    print()
    for i in range(n_samples * (original_index - start), n_samples * (original_index - start + 1)):
        print(i, mcmc.samples[i][2])

Generating samples starting from original index 1 (id 5f77754ba932fb2c4ba181d8-2)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 610 with energy 1.30080 (initial proposal energy: 1.86473)
Plateaued at step 1177 with energy 1.20492 (initial proposal energy: 1.86473)
Plateaued at step 501 with energy 1.50710 (initial proposal energy: 1.86473)
Plateaued at step 720 with energy 1.30080 (initial proposal energy: 1.86473)
Plateaued at step 743 with energy 1.30080 (initial proposal energy: 1.86473)

0 1.3008042573928833
1 1.2919270992279053
2 1.2931333780288696
3 1.3008042573928833
4 1.2846934795379639
Generating samples starting from original index 2 (id 614b603d4da88384282967a7-3)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 1363 with energy -3.28999 (initial proposal energy: 0.84700)
Plateaued at step 501 with energy 0.84700 (initial proposal energy: 0.84700)
Plateaued at step 597 with energy 0.49930 (initial proposal energy: 0.84700)
Plateaued at step 810 with energy 0.49930 (initial proposal energy: 0.84700)
Plateaued at step 879 with energy 0.49930 (initial proposal energy: 0.84700)

5 0.847002387046814
6 1.3008042573928833
7 1.2049205303192139
8 1.507095217704773
9 1.3008042573928833
Generating samples starting from original index 3 (id 5bc79f652885710001a0e82a-5)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 830 with energy 1.87107 (initial proposal energy: 2.05287)
Plateaued at step 501 with energy 2.05287 (initial proposal energy: 2.05287)
Plateaued at step 1075 with energy 1.26118 (initial proposal energy: 2.05287)
Plateaued at step 501 with energy 2.05287 (initial proposal energy: 2.05287)
Plateaued at step 734 with energy 1.62723 (initial proposal energy: 2.05287)

10 1.3008042573928833
11 -3.2899866104125977
12 0.847002387046814
13 0.49929893016815186
14 0.49929893016815186
Generating samples starting from original index 4 (id 614dec67f6eb129c3a77defd-6)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 1821 with energy 0.67794 (initial proposal energy: 2.21001)
Plateaued at step 780 with energy 1.18416 (initial proposal energy: 2.21001)
Plateaued at step 1345 with energy 0.94417 (initial proposal energy: 2.21001)
Plateaued at step 763 with energy 1.68948 (initial proposal energy: 2.21001)
Plateaued at step 580 with energy 2.14627 (initial proposal energy: 2.21001)

15 0.49929893016815186
16 1.8710731267929077
17 2.0528664588928223
18 1.2611806392669678
19 2.0528664588928223
Generating samples starting from original index 5 (id 615b40bb6cdb0f1f6f291f45-8)


  0%|          | 0/5 [00:00<?, ?it/s]

Plateaued at step 939 with energy 2.60935 (initial proposal energy: 3.08689)
Plateaued at step 1549 with energy 2.36589 (initial proposal energy: 3.08689)
Plateaued at step 635 with energy 2.88885 (initial proposal energy: 3.08689)
Plateaued at step 824 with energy 3.04646 (initial proposal energy: 3.08689)
Plateaued at step 883 with energy 2.29598 (initial proposal energy: 3.08689)

20 1.6272269487380981
21 0.6779375076293945
22 1.184155821800232
23 0.944167971611023
24 1.6894773244857788


In [25]:
mcmc.visualize_sample(14)

### Energy of real game: 0.847 | Energy of regrown game: 0.499 | Difference: -0.348

### Top features changing the game's energy
feature name: **value** = (original feature value => regrown feature value) * weight

| Features increasing energy (= more fake)                                        | Features decreasing energy (= more real)                                              |
|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------|
| in_arg_types_building_blocks_constraints: **0.228** = (1.000 => 0.000) * -0.228 | text_ngram_n_5_score: **-0.211** = (0.076 => 0.020) * 3.749                           |
|                                                                                 | text_ngram_n_4_score: **-0.159** = (0.105 => 0.051) * 2.951                           |
|                                                                                 | text_ngram_n_3_score: **-0.091** = (0.129 => 0.087) * 2.189                           |
|                                                                                 | in_arg_types_blocks_room_features_constraints: **-0.076** = (0.000 => 1.000) * -0.076 |
|                                                                                 | text_ngram_n_2_score: **-0.040** = (0.211 => 0.178) * 1.183                           |

### Game Diffs

0,1,2,3,4,5
f,1,(define (game 614b603d4da88384282967a7-3) (:domain many-objects-room-v1),f,1,(define (game 614b603d4da88384282967a7-3) (:domain many-objects-room-v1)
,2,(:constraints,,2,(:constraints
,3,(and,,3,(and
n,4,(forall (?b - building),n,4,(forall (?b - wall)
,5,(and,,5,(and
,6,(preference blockInTowerAtEnd,,6,(preference blockInTowerAtEnd
,7,(exists (?l - block),,7,(exists (?l - block)
,8,(at-end,,8,(at-end
t,9,(in ?b ?l),t,9,(in ?l ?b)
,10,),,10,)

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,


### Feature Diffs

in_arg_types_building_blocks_constraints: -1.000 (1.000 => 0.000)
text_ngram_n_5_score: -0.056 (0.076 => 0.020)
text_ngram_n_4_score: -0.054 (0.105 => 0.051)
text_ngram_n_3_score: -0.041 (0.129 => 0.087)
text_ngram_n_2_score: -0.033 (0.211 => 0.178)
in_arg_types_blocks_room_features_constraints: 1.000 (0.000 => 1.000)


In [34]:
mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].score(ast_printer.ast_to_string(mcmc.samples[14][0], ' '))

[('constraints', 'and', 'forall', 'wall', 'and'), ('and', 'forall', 'wall', 'and', 'preference'), ('forall', 'wall', 'and', 'preference', 'preferenceName'), ('wall', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]


{'score': -8.398438913739634}

In [33]:
mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].score(ast_printer.ast_to_string(mcmc.samples[14][3], ' '))

[('constraints', 'and', 'forall', 'building', 'and'), ('and', 'forall', 'building', 'and', 'preference'), ('forall', 'building', 'and', 'preference', 'preferenceName'), ('building', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]


{'score': -7.916773698598903}

In [8]:
ngram_model = mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1]

sample_ngrams = [('constraints', 'and', 'forall', 'wall', 'and'), ('and', 'forall', 'wall', 'and', 'preference'), ('forall', 'wall', 'and', 'preference', 'preferenceName'), ('wall', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]
original_ngrams = [('constraints', 'and', 'forall', 'building', 'and'), ('and', 'forall', 'building', 'and', 'preference'), ('forall', 'building', 'and', 'preference', 'preferenceName'), ('building', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]

print(f'Sample ngrams: {np.mean([ngram_model.ngram_logprobs[s] for s in sample_ngrams])} | ')
for s in sample_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])

print()
print(f'Original ngrams: {np.mean([ngram_model.ngram_logprobs[s] for s in original_ngrams])}')
for s in original_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])
print()

for s in sample_ngrams[:]:
    if s in original_ngrams:
        sample_ngrams.remove(s)
        original_ngrams.remove(s)

print('Sample remaining ngrams')
for s in sample_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])

print()
print('Original remaining ngrams')
for s in original_ngrams:
    print(s, ngram_model.ngram_counts[s], ngram_model.ngram_logprobs[s])

Sample ngrams: -8.398438913739634
('constraints', 'and', 'forall', 'wall', 'and') 1 -8.750366278367625
('and', 'forall', 'wall', 'and', 'preference') 1 -8.750366278367625
('forall', 'wall', 'and', 'preference', 'preferenceName') 1 -8.750366278367625
('wall', 'and', 'preference', 'preferenceName', 'exists') 2 -8.05721909780768
('and', 'preference', 'preferenceName', 'exists', 'block') 6 -6.958606809139571
('preference', 'preferenceName', 'exists', 'block', 'atend') 4 -7.364071917247735
('preferenceName', 'exists', 'block', 'atend', 'in') 1 -8.750366278367625
('exists', 'block', 'atend', 'in', 'scoring') 1 -8.750366278367625
('block', 'atend', 'in', 'scoring', 'externalforallmaximize') 1 -8.750366278367625
('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects') 1 -8.750366278367625
('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName') 1 -8.750366278367625

Original ngrams: -7.916773698598903
('constraints', 'and', 'forall', 'building', '

In [39]:
original_ngrams = [('constraints', 'and', 'forall', 'building', 'and'), ('and', 'forall', 'building', 'and', 'preference'), ('forall', 'building', 'and', 'preference', 'preferenceName'), ('building', 'and', 'preference', 'preferenceName', 'exists'), ('and', 'preference', 'preferenceName', 'exists', 'block'), ('preference', 'preferenceName', 'exists', 'block', 'atend'), ('preferenceName', 'exists', 'block', 'atend', 'in'), ('exists', 'block', 'atend', 'in', 'scoring'), ('block', 'atend', 'in', 'scoring', 'externalforallmaximize'), ('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects'), ('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName')]
print('Original ngrams')
for s in original_ngrams:
    print(s, mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].ngram_counts[s], mcmc.fitness_featurizer.full_text_registry[0].n_gram_model.models[-1].ngram_logprobs[s])

Original ngrams
('constraints', 'and', 'forall', 'building', 'and') 4 -7.364071917247735
('and', 'forall', 'building', 'and', 'preference') 4 -7.364071917247735
('forall', 'building', 'and', 'preference', 'preferenceName') 5 -7.140928365933525
('building', 'and', 'preference', 'preferenceName', 'exists') 5 -7.140928365933525
('and', 'preference', 'preferenceName', 'exists', 'block') 6 -6.958606809139571
('preference', 'preferenceName', 'exists', 'block', 'atend') 4 -7.364071917247735
('preferenceName', 'exists', 'block', 'atend', 'in') 1 -8.750366278367625
('exists', 'block', 'atend', 'in', 'scoring') 1 -8.750366278367625
('block', 'atend', 'in', 'scoring', 'externalforallmaximize') 1 -8.750366278367625
('atend', 'in', 'scoring', 'externalforallmaximize', 'countonceperobjects') 1 -8.750366278367625
('in', 'scoring', 'externalforallmaximize', 'countonceperobjects', 'preferenceName') 1 -8.750366278367625


In [10]:
mcmc.visualize_sample(7)

### Energy of visualized game: -1.113

### Top features contributing to the game's energy
feature name: **value** = (original feature value => regrown feature value) * weight

| Features increasing energy (= more fake)                              | Features decreasing energy (= more real)                                     |
|-----------------------------------------------------------------------|------------------------------------------------------------------------------|
| text_ngram_n_5_score: **2.542** = (0.678) * 3.749                     | all_variables_used: **-3.406** = (1.000) * -3.406                            |
| text_ngram_n_4_score: **1.384** = (0.469) * 2.951                     | variable_not_repeated: **-3.239** = (1.000) * -3.239                         |
| text_ngram_n_2_9: **0.725** = (1.000) * 0.725                         | all_preferences_used: **-1.335** = (1.000) * -1.335                          |
| text_ngram_n_3_1: **0.725** = (1.000) * 0.725                         | all_variables_defined: **-1.076** = (1.000) * -1.076                         |
| ast_ngram_n_2_1: **0.661** = (1.000) * 0.661                          | no_nested_logicals: **-0.966** = (1.000) * -0.966                            |
| ast_ngram_n_2_4: **0.641** = (1.000) * 0.641                          | no_adjacent_same_modal: **-0.843** = (1.000) * -0.843                        |
| ast_ngram_n_2_2: **0.638** = (1.000) * 0.638                          | no_two_number_operations: **-0.740** = (1.000) * -0.740                      |
| ast_ngram_n_2_0: **0.635** = (1.000) * 0.635                          | pref_forall_pref_forall_correct_types_correct: **-0.670** = (1.000) * -0.670 |
| text_ngram_n_2_2: **0.630** = (1.000) * 0.630                         | text_ngram_n_2_4: **-0.396** = (1.000) * -0.396                              |
| ast_ngram_n_3_0: **0.623** = (1.000) * 0.623                          | ast_ngram_n_3_9: **-0.366** = (1.000) * -0.366                               |
| text_ngram_n_3_score: **0.585** = (0.267) * 2.189                     | pref_forall_pref_forall_correct_arity_correct: **-0.320** = (1.000) * -0.320 |
| ast_ngram_n_2_5: **0.422** = (1.000) * 0.422                          | on_arg_types_room_features_blocks_setup: **-0.285** = (1.000) * -0.285       |
| node_count_terminal_2: **0.407** = (1.000) * 0.407                    | starts_and_ends_once: **-0.281** = (1.000) * -0.281                          |
| length_of_then_modals_3: **0.372** = (1.000) * 0.372                  | node_count_scoring_0: **-0.261** = (1.000) * -0.261                          |
| mean_depth_constraints_1: **0.347** = (1.000) * 0.347                 | mean_depth_scoring_1: **-0.240** = (1.000) * -0.240                          |
| text_ngram_n_2_score: **0.257** = (0.218) * 1.183                     | max_depth_terminal_2: **-0.128** = (1.000) * -0.128                          |
| mean_depth_setup_2: **0.247** = (1.000) * 0.247                       | max_depth_constraints_1: **-0.112** = (1.000) * -0.112                       |
| no_identical_logical_children: **0.203** = (1.000) * 0.203            | setup_objects_used: **-0.093** = (0.500) * -0.186                            |
| agent_holds_arg_types_blocks_constraints: **0.166** = (1.000) * 0.166 | compositionality_structure_2: **-0.068** = (1.000) * -0.068                  |
| node_count_constraints_1: **0.140** = (1.000) * 0.140                 | no_adjacent_once: **-0.019** = (1.000) * -0.019                              |

### Game:

```pddl
(define (game mcmc-7) (:domain few-objects-room-v1)
(:setup
  (and
    (forall (?l3 - cube_block)
      (game-optional
        (on rug ?l3)
      )
    )
  )
)
(:constraints
  (and
    (forall (?c5 - cube_block)
      (and
        (preference cubeBlockFromBedToShelf
          (then
            (once (agent_holds ?c5) )
            (hold (not (agent_holds ?c5) ) )
            (once (agent_holds ?c5) )
          )
        )
      )
    )
  )
)
(:terminal
  (>= (count-once-per-objects cubeBlockFromBedToShelf) 8 )
)
(:scoring
  (count cubeBlockFromBedToShelf:yellow_cube_block)
)
)
```

In [11]:
mcmc.visualize_sample(15)

### Energy of visualized game: 1.592

### Top features contributing to the game's energy
feature name: **value** = (original feature value => regrown feature value) * weight

| Features increasing energy (= more fake)                   | Features decreasing energy (= more real)                                     |
|------------------------------------------------------------|------------------------------------------------------------------------------|
| text_ngram_n_5_score: **2.824** = (0.753) * 3.749          | all_variables_used: **-3.406** = (1.000) * -3.406                            |
| text_ngram_n_4_score: **1.740** = (0.590) * 2.951          | variable_not_repeated: **-3.239** = (1.000) * -3.239                         |
| text_ngram_n_3_score: **1.208** = (0.552) * 2.189          | all_preferences_used: **-1.335** = (1.000) * -1.335                          |
| text_ngram_n_2_9: **0.725** = (1.000) * 0.725              | all_variables_defined: **-1.076** = (1.000) * -1.076                         |
| text_ngram_n_3_1: **0.725** = (1.000) * 0.725              | no_nested_logicals: **-0.966** = (1.000) * -0.966                            |
| ast_ngram_n_2_1: **0.661** = (1.000) * 0.661               | no_two_number_operations: **-0.740** = (1.000) * -0.740                      |
| ast_ngram_n_2_2: **0.638** = (1.000) * 0.638               | pref_forall_pref_forall_correct_types_correct: **-0.670** = (1.000) * -0.670 |
| ast_ngram_n_2_0: **0.635** = (1.000) * 0.635               | on_arg_types_furniture_large_objects_setup: **-0.401** = (1.000) * -0.401    |
| text_ngram_n_2_2: **0.630** = (1.000) * 0.630              | pref_forall_pref_forall_correct_arity_correct: **-0.320** = (1.000) * -0.320 |
| ast_ngram_n_3_0: **0.623** = (1.000) * 0.623               | max_depth_scoring_2: **-0.213** = (1.000) * -0.213                           |
| text_ngram_n_2_score: **0.518** = (0.438) * 1.183          | pref_forall_external_forall_used_correct: **-0.187** = (1.000) * -0.187      |
| node_count_terminal_2: **0.407** = (1.000) * 0.407         | max_depth_terminal_2: **-0.128** = (1.000) * -0.128                          |
| ast_ngram_n_5_score: **0.387** = (0.459) * 0.843           | max_depth_constraints_1: **-0.112** = (1.000) * -0.112                       |
| ast_ngram_n_4_score: **0.384** = (0.444) * 0.865           | setup_objects_used: **-0.047** = (0.250) * -0.186                            |
| mean_depth_setup_2: **0.247** = (1.000) * 0.247            |                                                                              |
| ast_ngram_n_3_score: **0.227** = (0.405) * 0.559           |                                                                              |
| no_identical_logical_children: **0.203** = (1.000) * 0.203 |                                                                              |
| mean_depth_constraints_0: **0.189** = (1.000) * 0.189      |                                                                              |
| node_count_constraints_0: **0.127** = (1.000) * 0.127      |                                                                              |
| node_count_setup_2: **0.127** = (1.000) * 0.127            |                                                                              |

### Game:

```pddl
(define (game mcmc-15) (:domain medium-objects-room-v1)
(:setup
  (exists (?c6 - (either dodgeball basketball book))
    (and
      (game-optional
        (on bed ?c6)
      )
    )
  )
)
(:constraints
  (and
    (forall (?c4 - dodgeball)
      (and
        (preference dodgeballHitsBin
          (at-end
            (broken ?c4)
          )
        )
      )
    )
  )
)
(:terminal
  (or
    (>= (total-time) 4 )
  )
)
(:scoring
  (external-forall-minimize
    (count-once-per-objects dodgeballHitsBin:dodgeball)
  )
)
)
```

### Notes from looking at these games
* Revisit the logic in `pref_forall_correct_types` and `pref_forall_correct_arity` -- it might want to be a 1/0/-1 instead of a 1/0
* Index 4 somehow caught onto the fact that `(agent_holds color)` is not penalized heavily, but also the same `pref_forall_...` stuff
* Features for different counts of items under a then, and/or for any two consecutive children of a then with the same modal?
* This sampler generates oddities that are rare enough in the training set to not get a significant weight -- see the terminal and scoring sections of indices 3 and 8, for examples 

In [2]:
print(ast_printer.ast_to_string(samples[8][0], '\n'))

NameError: name 'ast_printer' is not defined

In [None]:
fitness_featurizer = build_fitness_featurizer(DEFAULT_ARGS)
features = fitness_featurizer.parse(samples[7][0], 'mcmc', True)
{k: v for k, v in features.items() if 'correctly' in k or 'forall' in k or 'no' in k}

In [None]:
with gzip.open(DEFAULT_FITNESS_FUNCTION_PATH, 'rb') as f:
    cv_fitness_model = pickle.load(f)

In [None]:
weights = cv_fitness_model.named_steps['fitness'].model.fc1.weight.data.detach().numpy().squeeze()
bias = cv_fitness_model.named_steps['fitness'].model.fc1.bias.data.detach().numpy().squeeze()
print(weights.mean(), bias)

In [None]:
plt.hist(weights, bins=100)
plt.title('Fitness Model Weights')
plt.xlabel('Weight magnitude')
plt.ylabel('Count')

In [12]:
import ast_crossover_sampler
asts = [ast for ast in cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', 
    mcmc.grammar_parser, False, relative_path='..')]

mcmc_with_crossover = MCMCRegrowthCrossoverSampler(DEFAULT_ARGS, 
    ast_crossover_sampler.CrossoverType.SAME_RULE, asts, 0.25,
    '../models/cv_fitness_model_2022_12_26.pkl.gz', 
    greedy_acceptance=True, plateau_patience_steps=20, acceptance_temperature=10.0, max_steps=1000)

In [13]:
mcmc_with_crossover.multiple_samples(10, verbose=2, should_tqdm=True)
print([x[2] for x in mcmc_with_crossover.samples])

  0%|          | 0/10 [00:00<?, ?it/s]

Sampling exception, skipping sample
Regrowth generated identical games, repeating
Accepted step 1 with fitness 7.857532501220703
Accepted step 2 with fitness 7.784302711486816
Accepted step 3 with fitness 7.775148391723633
Accepted step 4 with fitness 7.692765235900879
Accepted step 6 with fitness 7.536055564880371
Regrowth generated identical games, repeating
Accepted step 8 with fitness 7.5235395431518555
Accepted step 10 with fitness 7.4317522048950195
Accepted step 11 with fitness 6.490839004516602
Accepted step 17 with fitness 5.810407638549805
Accepted step 19 with fitness 5.400604248046875
Accepted step 28 with fitness 5.354710578918457
Accepted step 44 with fitness 4.757689476013184
Accepted step 49 with fitness 4.688848495483398
Accepted step 68 with fitness 4.574114799499512
Accepted step 75 with fitness 4.34464693069458
Regrowth generated identical games, repeating
Accepted step 93 with fitness 3.9308552742004395
Regrowth generated identical games, repeating
Accepted step 10

In [14]:
print([x[2] for x in mcmc_with_crossover.samples])

[-0.5954062938690186, -0.4696420133113861, 1.7651313543319702, 5.2624006271362305, 0.8746525049209595, 1.2288413047790527, 0.9395236968994141, 1.8448952436447144, 0.8919482231140137, 1.2082358598709106]
