In [1]:
%load_ext autoreload
%autoreload 2


In [1]:
from argparse import Namespace
from collections import defaultdict
import copy
import difflib
import gzip
import itertools
import os
import pickle
import sys
import typing

import logging
logging.getLogger('matplotlib').setLevel(logging.WARNING)

from IPython.display import display, Markdown, HTML
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.pipeline import Pipeline
import tatsu
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import tqdm.notebook as tqdm


sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
sys.path.append(os.path.abspath('../reward-machine'))
from src import fitness_energy_utils as utils
from src.fitness_energy_utils import NON_FEATURE_COLUMNS
from src.ast_counter_sampler import *
from src.ast_utils import cached_load_and_parse_games_from_file, load_games_from_file, _extract_game_id
from src import ast_printer

2023-10-31 15:26:01 - ast_utils - DEBUG    - Using cache folder: /tmp/game_generation_cache
2023-10-31 15:26:01 - src.ast_utils - DEBUG    - Using cache folder: /tmp/game_generation_cache


In [2]:
grammar = open('../dsl/dsl.ebnf').read()
grammar_parser = tatsu.compile(grammar)
game_asts = list(cached_load_and_parse_games_from_file('../dsl/interactive-beta.pddl', grammar_parser, False, relative_path='..'))
real_game_texts = [ast_printer.ast_to_string(ast, '\n') for ast in game_asts]
regrown_game_1024_texts = list(load_games_from_file('../dsl/ast-real-regrowth-samples-1024.pddl.gz'))
print(len(real_game_texts), len(regrown_game_1024_texts), len(regrown_game_1024_texts) / 98)


2023-10-31 15:26:09 - src.ast_utils - INFO     - Loading from cache file: /tmp/game_generation_cache/interactive-beta-cache.pkl.gz
2023-10-31 15:26:10 - src.ast_utils - INFO     - Finished loading cache file: /tmp/game_generation_cache/interactive-beta-cache.pkl.gz


98 100352 1024.0


In [6]:
def dispaly_real_game(index: int):
    game_text = real_game_texts[index]
    display(Markdown(f'### Game #{index} ({game_asts[index][1].game_name}):'))
    display(Markdown(f'```pddl\n{game_text}\n```'))


dispaly_real_game(0)

### Game #0 (6172feb1665491d1efbce164-0):

```pddl
(define (game 6172feb1665491d1efbce164-0) (:domain medium-objects-room-v1)
(:setup
  (and
    (exists (?h - hexagonal_bin ?r - triangular_ramp)
      (game-conserved
        (< (distance ?h ?r) 1)
     )
   )
 )
)
(:constraints
  (and
    (preference throwToRampToBin
      (exists (?b - ball ?r - triangular_ramp ?h - hexagonal_bin)
        (then
          (once (agent_holds ?b))
          (hold-while (and (not (agent_holds ?b)) (in_motion ?b)) (touch ?b ?r))
          (once (and (not (in_motion ?b)) (in ?h ?b)))
       )
     )
   )
    (preference binKnockedOver
      (exists (?h - hexagonal_bin)
        (then
          (once (object_orientation ?h upright))
          (hold (and (not (touch agent ?h)) (not (agent_holds ?h))))
          (once (not (object_orientation ?h upright)))
       )
     )
   )
 )
)
(:terminal
  (>= (count-once binKnockedOver) 1)
)
(:scoring
  (count throwToRampToBin)
)
)
```

In [3]:
def extract_game_index(game_name: str):
    first_dash = game_name.find('-')
    second_dash = game_name.find('-', first_dash + 1)
    index = game_name[first_dash + 1:second_dash] if second_dash != -1 else game_name[first_dash + 1:]
    return int(index)


def extract_negative_index(game_name: str):
    first_dash = game_name.find('-')
    second_dash = game_name.find('-', first_dash + 1)
    if second_dash == -1:
        return -1
    
    third_dash = game_name.find('-', second_dash + 1)
    index = game_name[second_dash + 1:third_dash]
    return int(index)


fitness_df = utils.load_fitness_data('../data/fitness_features_1024_regrowths.csv.gz')

# fitness_df = fitness_df.assign(real=fitness_df.real.astype('int'), game_index=fitness_df.game_name.apply(extract_game_index), 
#                                negative_index= fitness_df.game_name.apply(extract_negative_index), fake=~fitness_df.real.astype('int'))
# fitness_df = fitness_df.sort_values(by=['fake', 'game_index', 'negative_index'], ignore_index=True).reset_index(drop=True)
# fitness_df.drop(columns=['Index', 'fake', 'game_index', 'negative_index'], inplace=True)
print(fitness_df.src_file.unique())
fitness_df.head()

['interactive-beta.pddl' 'ast-real-regrowth-samples-1024.pddl.gz']


Unnamed: 0,Index,src_file,game_name,domain_name,original_game_name,real,variables_defined_all,variables_defined_prop,variables_used_all,variables_used_prop,...,ast_ngram_constraints_n_4_score,ast_ngram_constraints_n_5_score,ast_ngram_terminal_n_2_score,ast_ngram_terminal_n_3_score,ast_ngram_terminal_n_4_score,ast_ngram_terminal_n_5_score,ast_ngram_scoring_n_2_score,ast_ngram_scoring_n_3_score,ast_ngram_scoring_n_4_score,ast_ngram_scoring_n_5_score
0,0,interactive-beta.pddl,6172feb1665491d1efbce164-0,medium-objects-room-v1,6172feb1665491d1efbce164-0,1,1,1.0,1,1.0,...,0.965835,0.97753,0.968377,0.978806,0.979229,0.981369,0.924171,0.962489,1.0,1.0
1,1,interactive-beta.pddl,5f77754ba932fb2c4ba181d8-2,many-objects-room-v1,5f77754ba932fb2c4ba181d8-2,1,1,1.0,1,1.0,...,0.97201,0.976522,0.964548,0.97559,0.974119,0.982116,0.904404,0.917146,0.966749,0.97377
2,2,interactive-beta.pddl,614b603d4da88384282967a7-3,many-objects-room-v1,614b603d4da88384282967a7-3,1,1,1.0,1,1.0,...,0.941851,0.970089,0.0,0.0,0.0,0.0,0.856664,0.906768,0.94989,0.956323
3,3,interactive-beta.pddl,5bc79f652885710001a0e82a-5,few-objects-room-v1,5bc79f652885710001a0e82a-5,1,1,1.0,1,1.0,...,0.985712,0.978548,0.0,0.0,0.0,0.0,0.924171,0.962489,1.0,1.0
4,4,interactive-beta.pddl,614dec67f6eb129c3a77defd-6,medium-objects-room-v1,614dec67f6eb129c3a77defd-6,1,1,1.0,1,1.0,...,0.981814,0.977738,0.0,0.0,0.0,0.0,0.968944,0.973379,0.974631,0.963877


In [5]:
from latest_model_paths import LATEST_FITNESS_FUNCTION_DATE_ID
# Change here if you want to use a different model
model_date_id = LATEST_FITNESS_FUNCTION_DATE_ID
data_df = fitness_df
cv_energy_model, feature_columns = utils.load_model_and_feature_columns(model_date_id)
print(len(feature_columns))


90


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [6]:
full_tensor = utils.df_to_tensor(data_df, feature_columns)
if 'wrapper' in cv_energy_model.named_steps: cv_energy_model.named_steps['wrapper'].eval()
full_tensor_scores = cv_energy_model.transform(full_tensor).detach()

real_game_scores = full_tensor_scores[:, 0]

print(f'Real game scores: {real_game_scores.mean():.4f} ± {real_game_scores.std():.4f}, min = {real_game_scores.min():.4f}, max = {real_game_scores.max():.4f}')

negatives_scores = full_tensor_scores[:, 1:]
torch.quantile(negatives_scores.ravel(), torch.linspace(0, 1, 11))
print(f'20th percentile negative energy: {torch.quantile(negatives_scores.ravel(), 0.2)}')

Real game scores: -34.4582 ± 0.6845, min = -35.9561, max = -33.0300
20th percentile negative energy: -32.874725341796875


In [7]:
steps = torch.linspace(0, 1, 101)
percentiles = torch.quantile(negatives_scores.ravel(), steps)
print(steps)
print(f'Energy percentiles: {steps}')

tensor([0.0000, 0.0100, 0.0200, 0.0300, 0.0400, 0.0500, 0.0600, 0.0700, 0.0800,
        0.0900, 0.1000, 0.1100, 0.1200, 0.1300, 0.1400, 0.1500, 0.1600, 0.1700,
        0.1800, 0.1900, 0.2000, 0.2100, 0.2200, 0.2300, 0.2400, 0.2500, 0.2600,
        0.2700, 0.2800, 0.2900, 0.3000, 0.3100, 0.3200, 0.3300, 0.3400, 0.3500,
        0.3600, 0.3700, 0.3800, 0.3900, 0.4000, 0.4100, 0.4200, 0.4300, 0.4400,
        0.4500, 0.4600, 0.4700, 0.4800, 0.4900, 0.5000, 0.5100, 0.5200, 0.5300,
        0.5400, 0.5500, 0.5600, 0.5700, 0.5800, 0.5900, 0.6000, 0.6100, 0.6200,
        0.6300, 0.6400, 0.6500, 0.6600, 0.6700, 0.6800, 0.6900, 0.7000, 0.7100,
        0.7200, 0.7300, 0.7400, 0.7500, 0.7600, 0.7700, 0.7800, 0.7900, 0.8000,
        0.8100, 0.8200, 0.8300, 0.8400, 0.8500, 0.8600, 0.8700, 0.8800, 0.8900,
        0.9000, 0.9100, 0.9200, 0.9300, 0.9400, 0.9500, 0.9600, 0.9700, 0.9800,
        0.9900, 1.0000])
Energy percentiles: tensor([0.0000, 0.0100, 0.0200, 0.0300, 0.0400, 0.0500, 0.0600, 0.0700, 0.0

In [8]:
def print_negatives_from_quantile(quantile: int, n_games: int, output_dir: str = './temp_outputs/'):
    min_energy = percentiles[20 * quantile]
    max_energy = percentiles[20 * (quantile + 1)]
    quantile_indices = ((negatives_scores >= min_energy) & (negatives_scores <= max_energy)).nonzero()
    game_indices = torch.randperm(quantile_indices.shape[0])[:n_games]
    lines = [f'; Games from quantile #{quantile} with energies in the range [{min_energy:.4f} ({int(steps[2 * quantile] * 100)}%), {max_energy:.4f} ({int(steps[2 * (quantile + 1)] * 100)}%) ]']
    for idx in game_indices:
        overall_index = 98 * quantile_indices[idx, 0] + quantile_indices[idx, 1]
        lines.append(f'; Game with index {overall_index} and energy {negatives_scores[quantile_indices[idx, 0], quantile_indices[idx, 1], quantile_indices[idx, 2]]:.4f}')
        lines.append(regrown_game_1024_texts[overall_index])
        lines.append('')

    with open(os.path.join(output_dir, f'quantile_{quantile}.txt'), 'w') as f:
        f.write('\n'.join(lines))


# n_games_to_print = 25
# for q in range(5):
#     print_negatives_from_quantile(q, n_games_to_print)
    


In [9]:
RANDOM_SEED = 33
torch.manual_seed(RANDOM_SEED)

N_GAMES = 20
N_GAMES_PER_QUANTILE = 10
N_QUANTILES = 4

original_game_indices = list(torch.randperm(full_tensor.shape[0])[:N_GAMES].numpy())
regrowths_by_original_game_index_and_quantile = {idx: {} for idx in original_game_indices}

for game_idx in original_game_indices:
    for q in range(N_QUANTILES):
        quantile_step = int(100 / N_QUANTILES)
        min_energy = percentiles[quantile_step * q]
        max_energy = percentiles[quantile_step * (q + 1)]
        game_negatives = negatives_scores[game_idx].squeeze()
        quantile_indices = ((game_negatives >= min_energy) & (game_negatives <= max_energy)).nonzero().squeeze()
        negatives_in_quantile_indices = torch.randperm(quantile_indices.shape[0])[:N_GAMES_PER_QUANTILE]
        regrowths_by_original_game_index_and_quantile[game_idx][q] = list(quantile_indices[negatives_in_quantile_indices].numpy())

        

In [14]:
import csv
from game_describer import GameDescriber
describer = GameDescriber(grammar_path='../dsl/dsl.ebnf')


In [66]:
game_idx = 0  # 0 to N_GAMES - 1
quantile = 3  # 0 to N_QUANTILES - 1
negative_idx = 0  # 0 to N_GAMES_PER_QUANTILE - 1

with open("./test.csv", 'w') as f:
    writer = csv.writer(f)
    writer.writerow(["SECTION", "STAGE 0", "STAGE 1", "STAGE 2"])
    for game_idx in range(0, 20):
        # for quantile in range(0, 4):
        quantile = game_idx % 4
        print(f"Testing game {game_idx}, quantile {quantile}")
        original_game_index = original_game_indices[game_idx]
        original_game = real_game_texts[original_game_index]
        regrowth_game = regrown_game_1024_texts[original_game_index * 1024 + regrowths_by_original_game_index_and_quantile[original_game_index][quantile][negative_idx]]

        stage_0 = describer.describe_stage_0(regrowth_game)
        stage_1 = describer.describe_stage_1(regrowth_game)

        for section_idx, section in enumerate(["SETUP", "PREFERENCES", "TERMINAL", "SCORING"]):
            writer.writerow([f"[Game {game_idx}] [Quantile {quantile}] {section}", stage_0[section_idx], stage_1[section_idx], ""])

        writer.writerow(["", "", "", ""])

        

Testing game 0, quantile 0
Testing game 0, quantile 1
Testing game 0, quantile 2
Testing game 0, quantile 3
Testing game 1, quantile 0
Testing game 1, quantile 1
Testing game 1, quantile 2
Testing game 1, quantile 3
Testing game 2, quantile 0
Testing game 2, quantile 1
Testing game 2, quantile 2
Testing game 2, quantile 3
Testing game 3, quantile 0
Testing game 3, quantile 1
Testing game 3, quantile 2
Testing game 3, quantile 3
Testing game 4, quantile 0
Testing game 4, quantile 1
Testing game 4, quantile 2
Testing game 4, quantile 3
Testing game 5, quantile 0
Testing game 5, quantile 1
Testing game 5, quantile 2
Testing game 5, quantile 3
Testing game 6, quantile 0
Testing game 6, quantile 1
Testing game 6, quantile 2
Testing game 6, quantile 3
Testing game 7, quantile 0
Testing game 7, quantile 1
Testing game 7, quantile 2
Testing game 7, quantile 3
Testing game 8, quantile 0
Testing game 8, quantile 1
Testing game 8, quantile 2
Testing game 8, quantile 3
Testing game 9, quantile 0
T

In [12]:
def visualize_game_diff(game_idx: int, quantile: int, negative_idx: int, output_dir: str = './temp_outputs/'):
    original_game_index = original_game_indices[game_idx]
    original_game = real_game_texts[original_game_index]
    regrowth_game = regrown_game_1024_texts[original_game_index * 1024 + regrowths_by_original_game_index_and_quantile[original_game_index][quantile][negative_idx]]

    utils.display_game_diff_html(original_game, regrowth_game)


game_index = 0  # 0 to N_GAMES - 1
quantile = 3  # 0 to N_QUANTILES - 1
negative_index = 0  # 0 to N_GAMES_PER_QUANTILE - 1

visualize_game_diff(game_index, quantile, negative_index)
    

0,1,2,3,4,5
n,1.0,(define (game 6158d01f302cf46b673dd597-87) (:domain few-objects-room-v1),n,1,(define (game 6158d01f302cf46b673dd597-87-539-nd-17-rd-6-rs-constraints-sk-prior
,,,,>,22-nr-1) (:domain few-objects-room-v1)
,2.0,(:setup,,2,(:setup
,3.0,(and,,3,(and
,4.0,(exists (?h - hexagonal_bin ?w - wall),,4,(exists (?h - hexagonal_bin ?w - wall)
,5.0,(game-conserved,,5,(game-conserved
,6.0,(and,,6,(and
,7.0,(on desk ?h),,7,(on desk ?h)
,8.0,(adjacent ?h ?w),,8,(adjacent ?h ?w)
,9.0,),,9,)

Legends,Legends.1
Colors Added Changed Deleted,Links (f)irst change (n)ext change (t)op

Colors
Added
Changed
Deleted

Links,Links.1
(f)irst change,
(n)ext change,
(t)op,
