In [3]:
%autoreload 2

In [4]:
import os
import sys
import typing

import numpy as np

sys.path.append(os.path.abspath('..'))
sys.path.append(os.path.abspath('../src'))
from src.ast_utils import load_games_from_file

In [5]:
import openai
openai.api_key = os.getenv('OPENAI_API_KEY')

In [6]:
game_texts = list(load_games_from_file('../dsl/interactive-beta.pddl'))

In [18]:
INSERT_TAG = '[insert]'
GAME_START = '(define'
SETUP_SECTION = '(:setup'
PREFERENCES_SECTION = '(:constraints'
TERMINAL_SECTION = '(:terminal'
SCORING_SECTION = '(:scoring'
SECTION_SUFFIX = ')\n'
GAME_END_SUFFIX = '))\n'


DEFAULT_RANDOM_SEED = 33


def split_game(game_text: str, 
    start_section_str: str, end_section_str: typing.Union[None, str, typing.Sequence[str]],
    suffix: str = SECTION_SUFFIX, game_end_suffix: str = GAME_END_SUFFIX) -> typing.Tuple[str, str]:
    start_index = game_text.find(start_section_str)

    if end_section_str is None:
        end_index = None

    elif isinstance(end_section_str, str):
        end_index = game_text.find(end_section_str)

    else:
        end_indices = [game_text.find(s) for s in end_section_str]
        end_index = min([i for i in end_indices if i >= 0])

    if start_index == -1:
        start_index = end_index

    if end_index == -1:
        end_index = None

    if end_index is None:
        return game_text[:start_index] + start_section_str, game_end_suffix

    return game_text[:start_index] + start_section_str, suffix + game_text[end_index:]
        
        
def create_multi_game_prompt_suffix(game_texts: typing.List[str],
    n_games_before: int, n_games_after: int, 
    target_game_split_func: typing.Callable[..., typing.Tuple[str, str]],
    target_game_split_func_args: typing.Dict[str, typing.Any],
    game_filter_str: typing.Optional[str] = None,
    rng: typing.Optional[np.random.Generator] = None, random_seed: int = DEFAULT_RANDOM_SEED
    ) -> typing.Tuple[str, str]:

    if rng is None:
        rng = np.random.default_rng(random_seed)

    if game_filter_str is not None:
        game_texts = [g for g in game_texts if game_filter_str in g]

    indices = rng.choice(len(game_texts), n_games_before + n_games_after + 1, replace=False)
    before_indices = indices[:n_games_before]
    target_index = indices[n_games_before]
    after_indices = indices[n_games_before + 1:]

    return create_multi_game_prompt_suffix_from_indices(game_texts, target_game_split_func, target_game_split_func_args, before_indices, target_index, after_indices)


def create_multi_game_prompt_suffix_from_indices(game_texts: typing.List[str], 
    target_game_split_func: typing.Callable[..., typing.Tuple[str, str]], 
    target_game_split_func_args: typing.Dict[str, typing.Any], 
    before_indices: typing.Sequence[int], 
    target_index: int, 
    after_indices: typing.Sequence[int]):

    before_games = [game_texts[i] for i in before_indices]
    target_prompt, target_suffix = target_game_split_func(game_texts[target_index], **target_game_split_func_args)
    after_games = [game_texts[i] for i in after_indices]

    before_games.append(target_prompt)
    after_games.insert(0, target_suffix)

    return '\n'.join(before_games), '\n'.join(after_games)


GAMES_WITH_SETUP = set([i for i, g in enumerate(game_texts) if SETUP_SECTION in g])
GAMES_WITH_SETUP_LIST = list(GAMES_WITH_SETUP)
GAMES_WITH_TERMINAL = set([i for i, g in enumerate(game_texts) if TERMINAL_SECTION in g])
GAMES_WITH_TERMINAL_LIST = list(GAMES_WITH_TERMINAL)


def create_all_prompts_for_game(game_texts: typing.List[str], 
    game_index: int, n_games_before: int, n_games_after: int, 
    rng: typing.Optional[np.random.Generator] = None, random_seed: int = DEFAULT_RANDOM_SEED):

    if rng is None:
        rng = np.random.default_rng(random_seed)

    setup_games = GAMES_WITH_SETUP_LIST[:]
    if game_index in setup_games:
        setup_games.remove(game_index)

    preferences_and_scoring_games = list(range(len(game_texts)))
    preferences_and_scoring_games.remove(game_index)

    terminal_games = GAMES_WITH_TERMINAL_LIST[:]
    if game_index in terminal_games:
        terminal_games.remove(game_index)

    setup_context_indices = rng.choice(setup_games, n_games_before + n_games_after, replace=False)
    preference_context_indices = rng.choice(preferences_and_scoring_games, n_games_before + n_games_after, replace=False)
    terminal_context_indices = rng.choice(terminal_games, n_games_before + n_games_after, replace=False)
    scoring_context_indice = rng.choice(preferences_and_scoring_games, n_games_before + n_games_after, replace=False)

    return {
        'setup': create_multi_game_prompt_suffix_from_indices(game_texts, split_game, 
            {'start_section_str': SETUP_SECTION, 'end_section_str': PREFERENCES_SECTION}, 
            setup_context_indices[:n_games_before], game_index, setup_context_indices[n_games_before:]),   # type: ignore
        'preferences': create_multi_game_prompt_suffix_from_indices(game_texts, split_game, 
            {'start_section_str': PREFERENCES_SECTION, 'end_section_str': (TERMINAL_SECTION, SCORING_SECTION)},
            preference_context_indices[:n_games_before], game_index, preference_context_indices[n_games_before:]),  # type: ignore
        'terminal': create_multi_game_prompt_suffix_from_indices(game_texts, split_game, 
            {'start_section_str': TERMINAL_SECTION, 'end_section_str': SCORING_SECTION},
            terminal_context_indices[:n_games_before], game_index, terminal_context_indices[n_games_before:]),  # type: ignore
        'scoring': create_multi_game_prompt_suffix_from_indices(game_texts, split_game, 
            {'start_section_str': SCORING_SECTION, 'end_section_str': None},
            scoring_context_indice[:n_games_before], game_index, scoring_context_indice[n_games_before:]),  # type: ignore
        
    }


In [21]:
for k, (prompt, suffix) in create_all_prompts_for_game(game_texts, 0, 2, 2).items():
    print(f'{k}:')
    print(prompt)
    print('*' * 20)
    print(suffix)
    print('=' * 80)

setup:
(define (game 61254c5a6facc8ed023a64de-48) (:domain medium-objects-room-v1)  
(:setup (and 
    (exists (?b - building ?h - hexagonal_bin) (game-conserved (and 
        (in ?b ?h)
        (>= (building_size ?b) 4) 
        (not (exists (?g - game_object) (and (in ?b ?g) (on ?h ?g))))
        (< (distance ?b room_center) 1)
    )))
))
(:constraints (and 
    (forall (?d - (either dodgeball basketball beachball))
        (preference ballThrownToBin (exists (?b - building ?h - hexagonal_bin)
            (then
                (once (agent_holds ?d))
                (hold (and (in_motion ?d) (not (agent_holds ?d))))
                (once (and (not (in_motion ?d)) (or (in ?h ?d) (on ?h ?d)) (or (in ?b ?h) (on ?b ?h))))
            )
        ))
    )
    (preference itemsHidingScreens 
        (exists (?s - (either desktop laptop) ?o - (either pillow doggie_bed teddy_bear)) 
            (at-end (on ?s ?o))    
        )
    )
    (preference objectsHidden
        (exists (?o - (either 

In [15]:
# prompt, suffix = create_multi_game_prompt_suffix(game_texts, 2, 1, split_game, 
#     dict(start_section_str=PREFERENCES_SECTION, end_section_str=(TERMINAL_SECTION, SCORING_SECTION)))
prompt, suffix = create_multi_game_prompt_suffix(game_texts, 2, 1, split_game, 
    dict(start_section_str=SCORING_SECTION, end_section_str=None), random_seed=42)
print(prompt)
print('=' * 80)
print(suffix)

(define (game 602d84f17cdd707e9caed37a-52) (:domain few-objects-room-v1)  
(:constraints (and 
    (preference blockFromRugToDesk (exists (?c - cube_block ) 
        (then 
            (once (and (on rug agent) (agent_holds ?c)))
            (hold (and 
                (on rug agent)
                (in_motion ?c)
                (not (agent_holds ?c))
                (not (exists (?o - (either lamp desktop laptop)) (or (broken ?o) (in_motion ?o))))
            ))
            (once (and (on rug agent) (on desk ?c) (not (in_motion ?c))))
        )
    ))
))
(:scoring 
    (count-once-per-objects blockFromRugToDesk)
))

(define (game 5d29412ab711e9001ab74ece-11) (:domain many-objects-room-v1)  
(:constraints (and 
    (forall (?b - building) (and 
        (preference baseBlockInTowerAtEnd (exists (?l - block)
            (at-end (and
                (in ?b ?l)  
                (on floor ?l)
            ))
        ))
        (preference blockOnBlockInTowerAtEnd (exists (?l - block)
     

In [16]:

response = openai.Completion.create(
    model="code-davinci-002",
    prompt=prompt,
    suffix=suffix,
    temperature=0.67,
    max_tokens=512,
    n=5,
    frequency_penalty=0,
    presence_penalty=0,
    stop=["(define", "(:terminal", "(:scoring", "(:constraints"]
)


In [17]:
for i, c in enumerate(response.choices):
    print(f'Choice {i}:')
    print(c.text)
    print('=' * 80)

Choice 0:
 (+ 
    (count golfballInBinFromPillow)
    (* 2 (count throwAttempt))
)))


Choice 1:
 (+ 
    (count-once golfballInBinFromPillow)
    (count-once throwAttempt)
)
Choice 2:
 (+ 
    (* 3 (count golfballInBinFromPillow:golfball))
    (count throwAttempt)
)
Choice 3:
 (external-forall-maximize (*
    (count-once golfballInBinFromPillow)
    (count throwAttempt)
)))


Choice 4:
 (+ 
    (count (exists (?h - hexagonal_bin) (in ?h golfball)))
    (* (- 1 (count throwAttempt)) (count golfballInBinFromPillow))
)))




In [49]:
response.choices[2]

<OpenAIObject at 0x11d7b0c20> JSON: {
  "finish_reason": "stop",
  "index": 2,
  "logprobs": null,
  "text": " \n    (forall (?b - (either golfball dodgeball)) (and \n        (preference throwKnocksOverBear (exists (?t - teddy_bear ?s - sliding_door) \n            (then\n                (once (and \n                    (agent_holds ?b)\n                    (adjacent agent desk)\n                    (adjacent agent ?s)\n                    (equal_z_position ?t bed)\n                ))\n                (hold-while\n                    (and (in_motion ?b) (not (agent_holds ?b)))\n                    (touch ?b ?t)\n                )\n                (once (in_motion ?t))\n            )\n        ))\n        (preference throwAttempt (exists (?s - sliding_door)\n            (then\n                (once (and (agent_holds ?b) (adjacent agent desk) (adjacent agent ?s)))\n                (hold (and (in_motion ?b) (not (agent_holds ?b))))\n                (once (not (in_motion ?b)))\n            )