In [1]:
import random
from dataclasses import dataclass, field
from typing import List, Dict

import numpy as np
import os
from tampura.environment import TampuraEnv
from tampura.spec import ProblemSpec
from tampura.structs import (
    AbstractBelief,
    ActionSchema,
    StreamSchema,
    AliasStore,
    Belief,
    NoOp,
    Predicate,
    State,
    effect_from_execute_fn,
    Observation
)
import logging 
from tampura.symbolic import OBJ, Atom, ForAll, And
from tampura.policies.tampura_policy import TampuraPolicy
from tampura.config.config import get_default_config, setup_logger

PICK_SUCCESS = 0.5
PLACE_SUCCESS = 0.8
OBJECTS = [f"{OBJ}o1",f"{OBJ}o2"]
INIT_LOCATIONS = [1,3]
GOAL_LOCATIONS = [4,5]
ROBOT_LOCATION = 2


In [2]:
# Observation space
@dataclass
class LocationObservation(Observation):
    at: Dict[str,str] = field(default_factory=lambda: {})

# Belief space
class LocationBelief(Belief):
    def __init__(self, at={}):
        self.at = at

    def update(self, a, o, s):
        return LocationBelief(at=o.at)

    def abstract(self, store: AliasStore):
        return AbstractBelief([Atom("at", [o,loc]) for o,loc in self.at.items()])

    def vectorize(self):
        pass
    
# Sample function for stream schema
    
def locate_sample_fn(input_sym, store):
    
    loc_obj = store.get("init_loc_"+input_sym[0])
    
    return loc_obj
        

# Action simulators
def pick_execute_fn(a, b, s, store):

    if random.random() < PICK_SUCCESS:
        at = b.at.copy()
        at[a.args[0]] = "loc_rob"
    else:
        at = b.at.copy()

    return State(), LocationObservation(at=at)

def place_execute_fn(a, b, s, store):

    if random.random() < PLACE_SUCCESS:
        at = b.at.copy()
        at[a.args[0]] = a.args[1]
    else:
        at = b.at.copy()
    
    return State(), LocationObservation(at=at)



# Set up environment dynamics
class ToyDiscrete(TampuraEnv):
    def initialize(self):
        
        store = AliasStore()
        store.set("loc_rob", ROBOT_LOCATION, "location")
        for (o,loc,goal_loc) in zip(OBJECTS,INIT_LOCATIONS,GOAL_LOCATIONS):
            store.set(o, o, "physical")
            store.set("init_loc_"+o, loc, "location")
            store.set("goal_loc_"+o, goal_loc, "location")

        return LocationBelief(), store

    def get_problem_spec(self) -> ProblemSpec:
        predicates = [
            Predicate("located", ["physical"]),
            Predicate("at", ["physical","location"])
        ]
        
        stream_schemas = [
            StreamSchema(
                name="locate",
                inputs=["?o1"],
                input_types=["physical"],
                output="?loc_o1",
                output_type="location",
                certified=[Atom("located",["?o1"])],
                sample_fn=locate_sample_fn                
            ),
        ]

        action_schemas = [
            ActionSchema(
                name="pick",
                inputs=["?o1"],
                input_types=["physical"],
                preconditions=[Atom("located",["?o1"])],
                verify_effects=[Atom("at", ["?o1","loc_rob"])],
                execute_fn=pick_execute_fn,
                effects_fn=effect_from_execute_fn(pick_execute_fn),
            ),
            ActionSchema(
                name="place",
                inputs=["?o1","?loc_o1"],
                input_types=["physical","location"],
                preconditions=[Atom("at",["?o1","loc_rob"])],
                verify_effects=[Atom("at", ["?o1","?loc_o1"])],
                execute_fn=place_execute_fn,
                effects_fn=effect_from_execute_fn(place_execute_fn),
            ),
            NoOp(),
        ]

        # reward = Atom("at",["o_o1","loc_rob"]) # WORKS
        reward = And([Atom("at", ["o_o1","goal_loc_o_o1"]),Atom("at", ["o_o2","goal_loc_o_o2"])]) # WORKS

        spec = ProblemSpec(
            predicates=predicates,
            stream_schemas=stream_schemas,
            action_schemas=action_schemas,
            reward=reward,
        )

        return spec



## Create environment and planner

In [3]:
# Planner
cfg = get_default_config(save_dir=os.getcwd())

# Set some print options to print out abstract belief, action, observation, and reward
cfg["print_options"] = "ab,a,o,r"
cfg["vis_graph"] = True
cfg["flat_sample"] = True
cfg["batch_size"] = 100
cfg["num_samples"] = 1000

# Initialize environment
env = ToyDiscrete(config=cfg)
b0, store = env.initialize()

# Set up logger to print info
setup_logger(cfg["save_dir"], logging.INFO)

# Initialize the policy
planner = TampuraPolicy(config = cfg, problem_spec = env.problem_spec)

## Run Planner
Make sure symk is installed (see README) before running the Tampura planner.
With the default settings, the planner should pick both every time.

In [4]:
history,store = planner.rollout(env, b0, store)


Abstract Belief: AbstractBelief(items=[])
Reward: 0.0
Flat Stream Sampling
Sampling StreamSchema(name='locate', inputs=['?o1'], input_types=['physical'], output='?loc_o1', output_type='location', preconditions=[], certified=[Atom(pred_name='located', args=['?o1'])], sample_fn=<function locate_sample_fn at 0x7b513cfa8b80>)(['o_o1'])
Sampling StreamSchema(name='locate', inputs=['?o1'], input_types=['physical'], output='?loc_o1', output_type='location', preconditions=[], certified=[Atom(pred_name='located', args=['?o1'])], sample_fn=<function locate_sample_fn at 0x7b513cfa8b80>)(['o_o2'])
Action: pick(o_o2)
Observation: LocationObservation(at={'o_o2': 'loc_rob'})

Abstract Belief: AbstractBelief(items=[Atom(pred_name='at', args=['o_o2', 'loc_rob'])])
Reward: 0.0
Action: place(o_o2, goal_loc_o_o2)
Observation: LocationObservation(at={'o_o2': 'goal_loc_o_o2'})

Abstract Belief: AbstractBelief(items=[Atom(pred_name='at', args=['o_o2', 'goal_loc_o_o2'])])
Reward: 0.0
Action: pick(o_o1)
Obser

In [31]:
history.actions

[Action(name='pick', args=['o_o2']),
 Action(name='pick', args=['o_o2']),
 Action(name='pick', args=['o_o1']),
 Action(name='pick', args=['o_o1']),
 Action(name='pick', args=['o_o1']),
 Action(name='pick', args=['o_o1']),
 Action(name='place', args=['o_o1', 'goal_loc_o_o1']),
 Action(name='place', args=['o_o2', 'goal_loc_o_o2']),
 Action(name='no-op', args=[]),
 Action(name='no-op', args=[]),
 None]

In [32]:
history.abstract_beliefs

[AbstractBelief(items=[]),
 AbstractBelief(items=[]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o2', 'loc_rob'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o2', 'loc_rob'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o2', 'loc_rob'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o2', 'loc_rob'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o1', 'loc_rob']), Atom(pred_name='at', args=['o_o2', 'loc_rob'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o1', 'goal_loc_o_o1']), Atom(pred_name='at', args=['o_o2', 'loc_rob'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o1', 'goal_loc_o_o1']), Atom(pred_name='at', args=['o_o2', 'goal_loc_o_o2'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o1', 'goal_loc_o_o1']), Atom(pred_name='at', args=['o_o2', 'goal_loc_o_o2'])]),
 AbstractBelief(items=[Atom(pred_name='at', args=['o_o1', 'goal_loc_o_o1']), Atom(pred_name='at', args=['o_o2', 'goal_loc_o_o2'])])]

In [33]:
store.als

{'loc_rob': 2,
 'o_o1': 'o_o1',
 'init_loc_o_o1': 1,
 'goal_loc_o_o1': 4,
 'o_o2': 'o_o2',
 'init_loc_o_o2': 3,
 'goal_loc_o_o2': 5,
 'o_lo_0': 1,
 'o_lo_1': 3}