In [80]:
import random
from dataclasses import dataclass, field
from typing import List, Dict

import numpy as np
import os
from tampura.environment import TampuraEnv
from tampura.spec import ProblemSpec
from tampura.structs import (
    AbstractBelief,
    ActionSchema,
    StreamSchema,
    AliasStore,
    Belief,
    NoOp,
    Predicate,
    State,
    effect_from_execute_fn,
    Observation
)
import logging 
from tampura.symbolic import OBJ, Atom, ForAll
from tampura.policies.tampura_policy import TampuraPolicy
from tampura.config.config import get_default_config, setup_logger

PICK_SUCCESS = 0.9
PLACE_SUCCESS = 0.8
OBJECTS = [f"{OBJ}o1"]
LOCATIONS = ["init"]
GOAL_LOCATIONS = ["goal_loc_o_o1"]
ROBOT_LOCATION = "loc_rob"


In [92]:
# State of the environment
class PickAndPlace(State):
    store: AliasStore = AliasStore()
    
# Observation space
@dataclass
class LocationObservation(Observation):
    at: Dict[str,str] = field(default_factory=lambda: {})

# Belief space
class LocationBelief(Belief):
    def __init__(self, at={}):
        self.at = at

    def update(self, a, o, s):
        return LocationBelief(at=o.at)

    def abstract(self, store: AliasStore):
        return AbstractBelief([Atom("at", [o,loc]) for o,loc in self.at.items()])

    def vectorize(self):
        pass
    
# Sample function for stream schema
    
def locate_sample_fn(input_sym, store):
    
    loc_obj = store.get("loc_"+input_sym[0])
    
    return loc_obj
        

# Action simulators
def pick_execute_fn(a, b, s, store):
    
    if random.random() < PICK_SUCCESS:
        loc_rob = store.get("loc_rob")
        b.at[a.args[0]] = loc_rob
        store.set("loc_"+a.args[0],loc_rob,"location")

    
    return store, LocationObservation(b.at)

def place_execute_fn(a, b, s, store):
    
    if random.random() < PLACE_SUCCESS:
        place_loc = store.get(a.args[1])
        b.at[a.args[0]] = place_loc
        store.set("loc_"+a.args[0],place_loc,"location")

    
    return store, LocationObservation(b.at)



# Set up environment dynamics
class ToyDiscrete(TampuraEnv):
    def initialize(self):
        store = AliasStore()
        store.set("loc_rob", ROBOT_LOCATION, "location")
        for (o,loc,goal_loc) in zip(OBJECTS,LOCATIONS,GOAL_LOCATIONS):
            store.set(o, o, "physical")
            store.set("loc_"+o, loc, "location")
            store.set("goal_loc_"+o, goal_loc, "location")


        return LocationBelief(), store

    def get_problem_spec(self) -> ProblemSpec:
        predicates = [
            Predicate("located", ["physical"]),
            Predicate("at", ["physical","location"])
        ]
        
        stream_schemas = [
            StreamSchema(
                name="locate",
                inputs=["?o1"],
                input_types=["physical"],
                output="?loc_o1",
                output_type="location",
                certified=[Atom("located",["?o1"])],
                sample_fn=locate_sample_fn                
            ),
        ]

        action_schemas = [
            ActionSchema(
                name="pick",
                inputs=["?o1"],
                input_types=["physical"],
                preconditions=[Atom("located",["?o1"])],
                verify_effects=[Atom("at", ["?o1","loc_rob"])],
                execute_fn=pick_execute_fn,
                effects_fn=effect_from_execute_fn(pick_execute_fn),
            ),
            ActionSchema(
                name="place",
                inputs=["?o1","?loc_o1"],
                input_types=["physical","location"],
                preconditions=[Atom("at",["?o1","loc_rob"])],
                verify_effects=[Atom("at", ["?o1","?loc_o1"])],
                execute_fn=place_execute_fn,
                effects_fn=effect_from_execute_fn(place_execute_fn),
            ),
            NoOp(),
        ]

        # reward = Atom("at",["o_o1","loc_rob"]) # WORKS??
        reward = Atom("at", ["o_o1","goal_loc_o_o1"])

        spec = ProblemSpec(
            predicates=predicates,
            stream_schemas=stream_schemas,
            action_schemas=action_schemas,
            reward=reward,
        )

        return spec



## Create environment and planner

In [93]:
# Planner
cfg = get_default_config(save_dir=os.getcwd())

# Set some print options to print out abstract belief, action, observation, and reward
cfg["print_options"] = "ab,a,o,r"
cfg["vis_graph"] = True
cfg["batch_size"] = 100
cfg["num_samples"] = 1000

# Initialize environment
env = ToyDiscrete(config=cfg)
b0, store = env.initialize()

# Set up logger to print info
setup_logger(cfg["save_dir"], logging.INFO)

# Initialize the policy
planner = TampuraPolicy(config = cfg, problem_spec = env.problem_spec)

## Run Planner
Make sure symk is installed (see README) before running the Tampura planner.
With the default settings, the planner should pick both every time.

In [94]:
history,store = planner.rollout(env, b0, store)


Abstract Belief: AbstractBelief(items=[])
Reward: 0.0
Flat Stream Sampling
Sampling StreamSchema(name='locate', inputs=['?o1'], input_types=['physical'], output='?loc_o1', output_type='location', preconditions=[], certified=[Atom(pred_name='located', args=['?o1'])], sample_fn=<function locate_sample_fn at 0x7fd4def93ac0>)(['o_o1'])
Progressive widening on action place(o_o1), 3.0>0
Progressive widening on action place(o_o1), 3.4460950649911055>1
Progressive widening on action place(o_o1), 3.737192818846552>2
Progressive widening on action place(o_o1), 3.9585237323186826>3
Progressive widening on action place(o_o1), 4.139188984383645>4
Progressive widening on action place(o_o1), 5.010832957004431>5
Progressive widening on action place(o_o1), 6.0>6
Progressive widening on action place(o_o1), 7.016828512284062>7
Progressive widening on action place(o_o1), 8.0018058251898>8
Action: pick(o_o1)
Observation: LocationObservation(at={'o_o1': 'loc_rob'})

Abstract Belief: AbstractBelief(items=[A

In [95]:
store.certified

[Atom(pred_name='located', args=['o_o1'])]