In [1]:
import random
from dataclasses import dataclass, field
from typing import List,Dict

import copy
import itertools 
import time
from tampura.policies.policy import save_config, RolloutHistory, save_run_data

import numpy as np
import os
from tampura.environment import TampuraEnv
from tampura.spec import ProblemSpec
from tampura.structs import (
    AbstractBelief,
    ActionSchema,
    StreamSchema,
    AliasStore,
    Belief,
    NoOp,
    Predicate,
    State,
    effect_from_execute_fn,
    Observation,
    AbstractBeliefSet,
)
import logging 
from tampura.symbolic import OBJ, Atom, ForAll, Not, Exists, Or, And, OneOf, eval_expr
from tampura.policies.tampura_policy import TampuraPolicy
from tampura.config.config import load_config, setup_logger

ROB = "robot_"
REG = "region_"
MUG = "mug"
DOOR = "door"
REGIONS = [f"{REG}{MUG}",f"{REG}{DOOR}",f"{REG}stable_mug"]
ACTION_NAMES = ["transit_action","transfer_action","pick_action","place_action","open_action","close_action","nothing_action"]

# problem specification: try with just one robot to demonstrate how overall cost increases
ROBOTS=[f"{ROB}1",f"{ROB}2"]
ROB_REGIONS = {ROBOTS[0]:REGIONS[-1],ROBOTS[1]:REGIONS[-1]} # long horizon: combinatorial explosion
# ROB_REGIONS = {ROBOTS[0]:REGIONS[1],ROBOTS[1]:REGIONS[0]} # short horizon: kind of works?
OBJ_REGIONS={MUG:REGIONS[0]}

# Test 
GOAL = And([Exists(Atom("holding",["?rob",MUG]),["?rob"],["robot"]),Not(Atom("open",[DOOR]))])

  import distutils.spawn


In [2]:
# Centralized planner
# State of the environment

# Belief space
class CentralBelief(Belief):
    def __init__(self, holding={},open_door=False,rob_regions={},obj_regions={},turn=ROBOTS[0]):
        # true state
        self.holding = holding
        self.open_door = open_door
        self.rob_regions = rob_regions
        self.obj_regions = obj_regions
        self.turn = turn
        

    def update(self, a, o, s):
        
        # dictionary mutations are IN-PLACE!!! use .copy()!!
        holding = self.holding.copy() 
        open_door = self.open_door
        rob_regions = self.rob_regions.copy()
        obj_regions = self.obj_regions.copy()
        turn = self.turn
        
        
        # BE CAREFUL: update names if you change action schema names
        if a.name == "pick":
            holding[a.args[0]]=[a.args[1]]
            obj_regions[a.args[1]]=""
        elif a.name == "place":
            holding[a.args[0]]=[]
            obj_regions[a.args[1]]=a.args[2]
        elif a.name == "transit" or a.name == "transfer":
            rob_regions[a.args[0]]=a.args[2]
        elif a.name == "open":
            open_door=True
        elif a.name == "close":
            open_door=False
        
        turn=a.args[-1] # turn of the agent
            
        return CentralBelief(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions,turn=turn)

    def abstract(self, store: AliasStore):
        
        ab = []
        
        # true state
        for rob in self.holding.keys():
            ab += [Atom("holding",[rob,obj]) for obj in self.holding[rob]]
        for rob in self.rob_regions.keys():
            ab += [Atom("in_rob",[rob,self.rob_regions[rob]])]
        for obj in self.obj_regions.keys():
            if self.obj_regions[obj] !="":
                ab += [Atom("in_obj",[obj,self.obj_regions[obj]])]
        if self.open_door:
            ab += [Atom("open",[DOOR])]
        
        ab += [Atom("turn",[self.turn])]
            
        return AbstractBelief(ab)

    # def vectorize(self):
    #     return np.array([int(obj in self.holding) for obj in OBJECTS])
      

def deterministic_execute_fn(a, b, s, store):
    return State(), Observation()
    
def deterministic_effects_fn(a, b, store):
    o = Observation()    
    new_belief=b.update(a,o,store)
    return AbstractBeliefSet.from_beliefs([new_belief], store)

# Set up environment dynamics
class ToyDiscreteCentral(TampuraEnv):
    
    def initialize(self,holding,open_door,rob_regions,obj_regions,turn):
        
        store = AliasStore()
        
        for rob in ROBOTS:
            
            store.set(rob, rob, "robot")
        # store.set(ego,ego,"robot")
            
        for region in REGIONS:
            store.set(region, region, "region")
        
        store.set(MUG, MUG, "physical")
        store.set(DOOR, DOOR, "door")
        
        store.certified.append(Atom("stable",[MUG,REGIONS[0]]))
        store.certified.append(Atom("stable",[MUG,REGIONS[2]]))

        b = CentralBelief(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions,turn=turn)

        return b, store

    def get_problem_spec(self) -> ProblemSpec:
        

        predicates = [
            
            Predicate("holding", ["robot","physical"]),
            Predicate("stable",["physical","region"]),
            Predicate("in_rob",["robot","region"]),
            Predicate("in_obj",["physical","region"]),
            Predicate("open",["door"]),
            Predicate("turn",["robot"])
        ] 
        
        # modify preconditions, effects and execute functions for observation
        action_schemas = [
            
            # ego-agent
            ActionSchema(
                name="pick",
                inputs=["?rob1","?obj1","?reg1","?rob2"],
                input_types=["robot","physical","region","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"])),
                               Or([Not(Atom("in_obj",["?obj1",REGIONS[0]])),And([Atom("in_obj",["?obj1",REGIONS[0]]),Atom("open",[DOOR])])]), # TODO: modify!! accesibility of mug: derived predicate
                               Atom("in_obj",["?obj1","?reg1"]), # object is in region from where pick is attempted
                               Atom("in_rob",["?rob1","?reg1"]), # robot is in region from where pick is attempted
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"])), # robot hand is free
                               ],
                effects=[Atom("holding",["?rob1","?obj1"]),Not(Atom("in_obj",["?obj1","?reg1"])),
                         Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))], # deterministic
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,
            ),
            
            
            ActionSchema(
                name="place",
                inputs=["?rob1","?obj1","?reg1","?rob2"],
                input_types=["robot","physical","region","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"])),
                               Or([Not(Atom("in_rob",["?rob1",REGIONS[0]])),And([Atom("in_rob",["?rob1",REGIONS[0]]),Atom("open",[DOOR])])]), # TODO: modify!! accessibility of region
                               Atom("in_rob",["?rob1","?reg1"]), # robot is in region where place is attempted
                               Atom("holding",["?rob1","?obj1"]), # robot is holding the object that is to be placed 
                               Atom("stable",["?obj1","?reg1"]), # region where place is attempted is stable
                               ],
                effects=[Not(Atom("holding",["?rob1","?obj1"])),Atom("in_obj",["?obj1","?reg1"]),
                         Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))], # deterministic 
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,
            ),
            

            ActionSchema(
                name="transit",
                inputs=["?rob1","?reg1","?reg2","?rob2"],
                input_types=["robot","region","region","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"])),
                               Atom("in_rob",["?rob1","?reg1"]),
                               Not(Atom("in_rob",["?rob1","?reg2"])),
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"])), # robot hand is free
                               ],
                effects=[Not(Atom("in_rob",["?rob1","?reg1"])),Atom("in_rob",["?rob1","?reg2"]),
                         Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))],
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,
            ),
            ActionSchema(
                name="transfer",
                inputs=["?rob1","?reg1","?reg2","?obj1","?rob2"],
                input_types=["robot","region","region","physical","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"])),
                               Atom("in_rob",["?rob1","?reg1"]),
                               Not(Atom("in_rob",["?rob1","?reg2"])),
                               Atom("holding",["?rob1","?obj1"])],
                effects=[Not(Atom("in_rob",["?rob1","?reg1"])),Atom("in_rob",["?rob1","?reg2"]),
                         Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))],
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,
            ),
            ActionSchema(
                name="open",
                inputs=["?rob1","?rob2"],
                input_types=["robot","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"])),
                               Not(Atom("open",[DOOR])),
                               Atom("in_rob",["?rob1",REGIONS[1]]),
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"]))],
                effects=[Atom("open",[DOOR]),
                         Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))],
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,
            ),
            ActionSchema(
                name="close",
                inputs=["?rob1","?rob2"],
                input_types=["robot","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"])), 
                               Atom("open",[DOOR]),
                               Atom("in_rob",["?rob1",REGIONS[1]]),
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"]))],
                effects=[Not(Atom("open",[DOOR])),
                         Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))],
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,

            ),
            
            ActionSchema(
                name="nothing",
                inputs=["?rob1","?rob2"],
                input_types=["robot","robot"],
                preconditions=[Atom("turn",["?rob1"]),Not(Atom("turn",["?rob2"]))],
                effects=[Atom("turn",["?rob2"]),Not(Atom("turn",["?rob1"]))],
                effects_fn=deterministic_effects_fn,
                execute_fn=deterministic_execute_fn,
            ),
        ]
        
        
        reward = GOAL

        spec = ProblemSpec(
            predicates=predicates,
            action_schemas=action_schemas,
            reward=reward,
        )

        return spec



In [3]:
# testing
# Planner
cfg = load_config(config_file="../tampura/config/default.yml")

# Set some print options to print out abstract belief, action, observation, and reward
cfg["print_options"] = "ab,a,o,r"
cfg["vis_graph"] = True
# batch size 100, num samples 500 num skeletons 100 works best!!
cfg["batch_size"] = 100 #100 
cfg["num_samples"] = 100#500
cfg["max_steps"] = 15
cfg["num_skeletons"] = 10
cfg["flat_sample"] = False # TODO: check; may cause progressive widening
cfg['save_dir'] = os.getcwd()+"/runs/run{}".format(time.time())

# cfg['from_scratch'] = False # imp: re-use!!! but graph gets too big

# TODO: check - can we reuse the same environment for both agents?
# for robot1
# Initialize environment
env = ToyDiscreteCentral(config=cfg)
b0, store= env.initialize(holding={ROBOTS[0]:[],ROBOTS[1]:[]},open_door=False,
                          rob_regions={ROBOTS[0]:REGIONS[-1],ROBOTS[1]:REGIONS[-1]},
                          obj_regions={MUG:REGIONS[0]},turn=ROBOTS[0])

# Set up logger to print info
setup_logger(cfg["save_dir"], logging.INFO)

# Initialize the policy
planner = TampuraPolicy(config = cfg, problem_spec = env.problem_spec)

In [4]:
b=b0
assert env.problem_spec.verify(store)

save_config(planner.config, planner.config["save_dir"])

history = RolloutHistory(planner.config)
st = time.time()
for step in range(100):
# while True:
    s = copy.deepcopy(env.state)
    a_b = b.abstract(store)
    reward = env.problem_spec.get_reward(a_b, store)
    if reward:
        print("goal achieved")
        break

    logging.info("\n" + ("=" * 10) + "t=" + str(step) + ("=" * 10))
    if "s" in planner.print_options:
        logging.info("State: " + str(s))
    if "b" in planner.print_options:
        logging.info("Belief: " + str(b))
    if "ab" in planner.print_options:
        logging.info("Abstract Belief: " + str(a_b))
    if "r" in planner.print_options:
        logging.info("Reward: " + str(reward))
    
    
    action, info, store = planner.get_action(b, store) # should only call effects functions!!??
    
    
    if "a" in planner.print_options:
        logging.info("Action: " + str(action))

    if action.name == "no-op":
        bp = copy.deepcopy(b)
        observation = None
    else:
        observation = env.step(action, b, store) # should call execute function
        bp = b.update(action, observation, store)

        if planner.config["vis"]:
            env.vis_updated_belief(bp, store)

    a_bp = bp.abstract(store)
    history.add(s, b, a_b, action, observation, reward, info, store, time.time() - st)

    reward = env.problem_spec.get_reward(a_bp, store)
    
    if "o" in planner.print_options:
        logging.info("Observation: " + str(observation))
    if "sp" in planner.print_options:
        logging.info("Next State: " + str(env.state))
    if "bp" in planner.print_options:
        logging.info("Next Belief: " + str(bp))
    if "abp" in planner.print_options:
        logging.info("Next Abstract Belief: " + str(a_bp))
    if "rp" in planner.print_options:
        logging.info("Next Reward: " + str(reward))

    # update the belief
    b = bp

history.add(env.state, bp, a_bp, None, None, reward, info, store, time.time() - st)

logging.info("=" * 20)

env.wrapup()

if not planner.config["real_execute"]:
    save_run_data(history, planner.config["save_dir"])



Abstract Belief: AbstractBelief(items=[Atom(pred_name='turn', args=['robot_1']), Atom(pred_name='in_rob', args=['robot_1', 'region_stable_mug']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='in_rob', args=['robot_2', 'region_stable_mug'])])
Reward: 0.0
[TampuraPolicy] Flat Action Parameter Sampling
[TampuraPolicy] Outcome Sampling


100%|██████████| 100/100 [00:00<00:00, 161.85it/s]

[TampuraPolicy] MDP Solving
Action: transit(robot_1, region_stable_mug, region_door, robot_2)
Observation: Observation()

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='turn', args=['robot_2']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='in_rob', args=['robot_2', 'region_stable_mug'])])
Reward: 0.0
[TampuraPolicy] MDP Solving





Action: transit(robot_2, region_stable_mug, region_mug, robot_1)
Observation: Observation()

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='turn', args=['robot_1']), Atom(pred_name='in_rob', args=['robot_2', 'region_mug']), Atom(pred_name='in_obj', args=['mug', 'region_mug'])])
Reward: 0.0
[TampuraPolicy] MDP Solving
Action: open(robot_1, robot_2)
Observation: Observation()

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='in_rob', args=['robot_2', 'region_mug']), Atom(pred_name='turn', args=['robot_2']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='open', args=['door'])])
Reward: 0.0
[TampuraPolicy] MDP Solving
Action: pick(robot_2, mug, region_mug, robot_1)
Observation: Observation()

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='holding', args=['robot_2', 'mug']

In [5]:
cfg['save_dir']

'/home/am/tamp_multi_agent/notebooks/runs/run1748344218.7346234'

In [6]:
# State of the environment
@dataclass
class EnvState(State):
    holding: Dict[str,List[str]] = field(default_factory=lambda: {})
    open_door: bool = field(default_factory=lambda: False)
    rob_regions: Dict[str,str] = field(default_factory=lambda:{})
    obj_regions: Dict[str,str] = field(default_factory=lambda:{})
    next_actions: List[str] = field(default_factory=lambda: [])
    
# Observation space
@dataclass
class EnvObservation(Observation):
    holding: Dict[str,List[str]] = field(default_factory=lambda: {})
    open_door: bool = field(default_factory=lambda: False)
    rob_regions: Dict[str,str] = field(default_factory=lambda:{})
    obj_regions: Dict[str,str] = field(default_factory=lambda:{})
    next_actions: List[str] = field(default_factory=lambda: [])

# Belief space
class EnvBelief(Belief):
    def __init__(self, holding={},open_door=False,rob_regions={},obj_regions={},next_actions=[]):
        # true state
        self.holding = holding
        self.open_door = open_door
        self.rob_regions = rob_regions
        self.obj_regions = obj_regions
        self.next_actions = next_actions
        

    def update(self, a, o, s):
        
        # dictionary mutations are IN-PLACE!!! use .copy()!!
        holding = self.holding.copy() 
        open_door = self.open_door
        rob_regions = self.rob_regions.copy()
        obj_regions = self.obj_regions.copy()
        next_actions = self.next_actions
        
        
        # get argument index for ego agent
        
        a_other_name,a_ego_name = a.name.split("*")
        
        if a_other_name == "transfer_other":
            nargs_other = 4
        elif a_other_name == "nothing_other" or a_other_name == "open_other" or a_other_name == "close_other":
            nargs_other = 1
        else:
            nargs_other = 3
            
        a_ego_args = a.args[nargs_other:]
        
        
        # special cases
        # case 1: place, pick
        if a_other_name == "place_other" and a_ego_name == "pick_ego":
            
            holding = o.holding
            obj_regions = o.obj_regions
            
        # case 2: open, pick
        # case 3: open, place
        elif (a_other_name == "open_other") and (a_ego_name == "pick_ego" or a_ego_name == "place_ego"):
            
            open_door = o.open_door
            obj_regions = o.obj_regions
            holding = o.holding
            
        # case 4: open, close
        # case 7: close, open
        elif (a_other_name == "open_other" and a_ego_name == "close_ego") or (a_other_name == "close_other" and a_ego_name == "open_ego"):
            
            open_door = o.open_door
        
        # case 5: close, pick
        # case 6: close, place
        elif a_other_name == "close_other" and (a_ego_name == "pick_ego" or a_ego_name == "place_ego"):
            
            open_door = o.open_door
            obj_regions = o.obj_regions
            holding = o.holding
        
        
        # others 
        else: # ego: non-deterministic, other: non-deterministic
            
            # ego actions
            if a_ego_name == "transit_ego" or a_ego_name == "transfer_ego": # update NOT taking place!!!
                rob_regions[a_ego_args[0]] = o.rob_regions[a_ego_args[0]]
                
                    
            elif a_ego_name == "pick_ego" or a_ego_name == "place_ego":
                holding[a_ego_args[0]] = o.holding[a_ego_args[0]]
                obj_regions[a_ego_args[1]] = o.obj_regions[a_ego_args[1]]
            
            elif a_ego_name == "open_ego" or a_ego_name == "close_ego": # open or shut door
                open_door = o.open_door
                
            elif a_ego_name == "nothing_ego":
                pass
            
            # other agent's actions     
            if a_other_name == "transit_other" or a_other_name == "transfer_other":
                # assumption: transit is only confusable with transit or noop
                # assumption: transfer is only confusable with transfer or noop
                rob_regions[a.args[0]] = o.rob_regions[a.args[0]]
            
            elif a_other_name == "pick_other" or a_other_name == "place_other":
                # assumption: pick is only confusable with pick or noop
                # assumption: place is only confusable with place or noop
                holding[a.args[0]] = o.holding[a.args[0]]
                obj_regions[a.args[1]] = o.obj_regions[a.args[1]]
            
            elif a_other_name == "open_other" or a_other_name == "close_other":
                # assumption: open is only confusable with open or noop
                # assumption: close is only confusable with close or noop
                open_door = o.open_door
            
            elif a_other_name == "nothing_other":
                # no change in state
                pass
           
        next_actions = o.next_actions
            
        return EnvBelief(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions,next_actions=next_actions)

    def abstract(self, store: AliasStore):
        
        ab = []
        
        # true state
        for rob in self.holding.keys():
            ab += [Atom("holding",[rob,obj]) for obj in self.holding[rob]]
        for rob in self.rob_regions.keys():
            ab += [Atom("in_rob",[rob,self.rob_regions[rob]])]
        for obj in self.obj_regions.keys():
            if self.obj_regions[obj] !="":
                ab += [Atom("in_obj",[obj,self.obj_regions[obj]])]
        if self.open_door:
            ab += [Atom("open",[DOOR])]
        
        # next actions
        if self.next_actions != []:
            for next_action in self.next_actions:
                
                name,args = next_action.split("-")
                args=list(args.split("%"))
                
                rob=args[0]
                if Atom("is_ego",[rob]) not in store.certified:
                    ab += [Atom(name,args)]
            
        return AbstractBelief(ab)

    # def vectorize(self):
    #     return np.array([int(obj in self.holding) for obj in OBJECTS])
      
def get_next_actions_execute(a, b, store): # human operator : tedious, kind of works
    
    a_other_name,a_ego_name = a.name.split("*")
    if a_other_name == "transfer_other":
        n_args=4
    elif a_other_name == "open_other" or a_other_name == "close_other" or a_other_name == "nothing_other":
        n_args = 1
    else:
        n_args = 3
        
    print("ego attempts action ..")
    print(a_ego_name)
    print(a.args[n_args:])
    print("... belief ...")
    b_temp=copy.deepcopy(b) 
    print(b_temp.abstract(store))
    print("applicable actions...")
    
    next_actions=[]
    others = []
    for entity in store.als_type:
        if store.als_type[entity]=="robot":
            if Atom("is_ego",[entity]) not in store.certified:
                others.append(entity)
    
    for rob in others: # one list of outcomes per robot
        
        ab = b_temp.abstract(store)
        applicable_actions_rob=[]
        # nothing is always applicable
        applicable_actions_rob.append(Atom("nothing_action",[rob]))
        for reg in REGIONS:
            for obj in OBJ_REGIONS.keys():
                if Atom("holding",[rob,obj]) in ab.items: # robot is holding an object: it can transfer or place
                    if Atom("in_rob",[rob,reg]) not in ab.items:
                        applicable_actions_rob.append(Atom("transfer_action",[rob,obj,reg]))
                    else:
                        if Atom("stable",[obj,reg]) in store.certified:
                            if (reg==REGIONS[0] and Atom("open",[DOOR]) in ab.items) or reg!=REGIONS[0]: # region accessibility
                                applicable_actions_rob.append(Atom("place_action",[rob,obj]))
                else: # robot is not holding an object: it can transit, open, pick, depending on where it is and where the objects are
                    if Atom("in_rob",[rob,reg]) in ab.items: # cannot move to reg
                        if reg==REGIONS[1]:
                            if Atom("open",[DOOR]) not in ab.items: 
                                applicable_actions_rob.append(Atom("open_action",[rob]))
                            else:
                                applicable_actions_rob.append(Atom("close_action",[rob]))
                        if Atom("in_obj",[obj,reg]) in ab.items and Atom("in_rob",[rob,reg]):
                            # accessibility of obj
                            if (reg==REGIONS[0] and Atom("open",[DOOR]) in ab.items) or (reg!=REGIONS[0]):
                                applicable_actions_rob.append(Atom("pick_action",[rob,obj]))
                    else: # can move to reg
                        applicable_actions_rob.append(Atom("transit_action",[rob,reg]))
        
        while True:
            
            for i,act in enumerate(applicable_actions_rob):
                print(str(i)+". "+act.pred_name+str(act.args))
                
            choice = input("choose an action \n")
            if int(choice)>=0 and int(choice)<len(applicable_actions_rob):
                break
            else:
                print("invalid choice, enter again")
          
        
        
        observed_action_rob = applicable_actions_rob[int(choice)]   
        print(observed_action_rob)
        
        name=observed_action_rob.pred_name
        args=observed_action_rob.args
        
        if name=="transit_action":
            a_other=name+"-"+rob+"%"+args[1]
        elif name == "transfer_action":
            a_other=name+"-"+rob+"%"+args[1]+"%"+args[2]
        elif name == "pick_action":
            a_other=name+"-"+rob+"%"+args[1]
        elif name == "place_action":
            a_other=name+"-"+rob+"%"+args[1]
        else: # open, close, nothing
            a_other=name+"-"+rob
            
        next_actions.append(a_other)
            
    
    return next_actions # for all the agents
def get_next_actions_effects(a, b, store): # human operator : tedious, kind of works
    
    b_temp=copy.deepcopy(b) 
    # b_temp=b_temp.update(a,None,None)
    
    next_actions = []
    
    others = []
    
    for entity in store.als_type:
        if store.als_type[entity]=="robot":
            if Atom("is_ego",[entity]) not in store.certified:
                others.append(entity)
    
    for rob in others: # one list of outcomes per robot
        
        ab = b_temp.abstract(store)
        applicable_actions_rob=[]
        # nothing is always applicable
        applicable_actions_rob.append(Atom("nothing_action",[rob]))
        
        observed_action_rob = ""

        for reg in REGIONS:
            for obj in OBJ_REGIONS.keys():
                if Atom("holding",[rob,obj]) in ab.items: # robot is holding an object: it can transfer or place
                    if Atom("in_rob",[rob,reg]) not in ab.items:
                        applicable_actions_rob.append(Atom("transfer_action",[rob,obj,reg]))
                    else:
                        if Atom("stable",[obj,reg]) in store.certified:
                            if (reg==REGIONS[0] and Atom("open",[DOOR]) in ab.items) or reg!=REGIONS[0]: # region accessibility
                                applicable_actions_rob.append(Atom("place_action",[rob,obj]))
                                observed_action_rob=Atom("place_action",[rob,obj])
                else: # robot is not holding an object: it can transit, open, pick, depending on where it is and where the objects are
                    if Atom("in_rob",[rob,reg]) in ab.items: # cannot move to reg
                        if reg==REGIONS[1]:
                            if Atom("open",[DOOR]) not in ab.items: # door
                                applicable_actions_rob.append(Atom("open_action",[rob]))
                                observed_action_rob=Atom("open_action",[rob])
                                break
                            else:
                                applicable_actions_rob.append(Atom("close_action",[rob]))
                                observed_action_rob=Atom("close_action",[rob])
                                break
                        if Atom("in_obj",[obj,reg]) in ab.items and Atom("in_rob",[rob,reg]):
                            # accessibility of obj
                            if (reg==REGIONS[0] and Atom("open",[DOOR]) in ab.items) or (reg!=REGIONS[0]):
                                applicable_actions_rob.append(Atom("pick_action",[rob,obj]))
                                observed_action_rob=Atom("pick_action",[rob,obj])
                                break
                    else: # can move to reg
                        applicable_actions_rob.append(Atom("transit_action",[rob,reg]))

        # simulation: if pick, place, open, close are applicable, the other agent tends to perform that action
        if observed_action_rob == "":
            observed_action_rob = random.choice(applicable_actions_rob)
        else: # 70% of time "goal directed" actions, 30% of the time random
            if random.random()<0.3:
                observed_action_rob = random.choice(applicable_actions_rob)
                
        #observed_action_rob = Atom("nothing_action",[rob]) # INACTIVE OTHER AGENT!!!
        
        name=observed_action_rob.pred_name
        args=observed_action_rob.args
        
        if name=="transit_action":
            a_other=name+"-"+rob+"%"+args[1]
        elif name == "transfer_action":
            a_other=name+"-"+rob+"%"+args[1]+"%"+args[2]
        elif name == "pick_action":
            a_other=name+"-"+rob+"%"+args[1]
        elif name == "place_action":
            a_other=name+"-"+rob+"%"+args[1]
        else: # open, close, nothing
            a_other=name+"-"+rob
            
        next_actions.append(a_other)
    
    
    return next_actions # for all the agents

def non_deterministic_ego_execute_fn(a, b, s, store):
    
    a_other_name,a_ego_name = a.name.split("*")
    
    if a_other_name == "transfer_other":
        n_args = 4
    elif a_other_name == "open_other" or a_other_name == "close_other" or a_other_name == "nothing_other":
        n_args = 1
    else:
        n_args = 3
    
    args_ego = a.args[n_args:]
    
    
    # remove ego's previous action
    for na in s.next_actions: 
        name,args = na.split("-")
        args=args.split("%")
        if args[0] == args_ego[0]:
            s.next_actions.remove(na)
    
    if a_ego_name == "transit_ego" or a_ego_name == "transfer_ego":
        
        if random.random()<0.9: # 90% success
            s.rob_regions[args_ego[0]] = args_ego[2]
            
        next_action = a_ego_name[:-3]+"action"+"-"+args_ego[0]+"%"+args_ego[2]
        
        # additional argument 
        if a_ego_name == "transfer_ego":
            next_action = next_action + "%" + args_ego[3]
            
    elif a_ego_name == "pick_ego":
        
        if random.random()<0.9: # 90% success
            s.holding[args_ego[0]] = [args_ego[1]]
            s.obj_regions[args_ego[1]] = ""
            
        next_action = a_ego_name[:-3]+"action"+"-"+args_ego[0]+"%"+args_ego[1]
        
    elif a_ego_name == "place_ego":
        
        if random.random()<0.9: # 90% success
            s.holding[args_ego[0]] = []
            s.obj_regions[args_ego[1]] = args_ego[2]
            
        next_action = a_ego_name[:-3]+"action"+"-"+args_ego[0]+"%"+args_ego[1]
        
    elif a_ego_name == "open_ego":
        
        if random.random()<0.9: # 90% success
            s.open_door = True
            
        next_action = a_ego_name[:-3]+"action"+"-"+args_ego[0]
        
    elif a_ego_name == "close_ego":
        
        if random.random()<0.9: # 90% success
            s.open_door = False
            
        next_action = a_ego_name[:-3]+"action"+"-"+args_ego[0]
        
    else:
        
        next_action = a_ego_name[:-3]+"action"+"-"+args_ego[0]
        
        
    # add ego's next action for the other agent     
    s.next_actions.append(next_action)   
    
    next_actions = s.next_actions.copy() 
    
    for na in next_actions: # replace other agents' previous action with noop (temporary, till observation is received)
        name,args = na.split("-")
        args=args.split("%")
        if args[0] != args_ego[0]:
            next_actions.remove(na)
            next_actions.append("nothing_action-"+args[0])
    
    return s, EnvObservation(next_actions=next_actions)
def non_deterministic_ego_effects_fn(a, b, store):
    a_other_name,a_ego_name = a.name.split("*")
    
    if a_other_name == "transfer_other":
        n_args = 4
    elif a_other_name == "open_other" or a_other_name == "close_other" or a_other_name == "nothing_other":
        n_args = 1
    else:
        n_args = 3
    
    args_ego = a.args[n_args:]
    
    next_actions = get_next_actions_effects(a, b, store)
    o = EnvObservation(next_actions=next_actions)
    o.rob_regions = b.rob_regions.copy()
    o.holding = b.holding.copy()
    o.obj_regions = b.obj_regions.copy()
    o.open_door = b.open_door
    
    if a_ego_name == "transit_ego" or a_ego_name == "transfer_ego":
        if random.random()<0.9: # 90% success
            o.rob_regions[args_ego[0]] = args_ego[2]
    elif a_ego_name == "pick_ego":
        if random.random()<0.9: # 90% success
            o.holding[args_ego[0]] = [args_ego[1]]
            o.obj_regions[args_ego[1]] = ""
    elif a_ego_name == "place_ego":
        if random.random()<0.9: # 90% success
            o.holding[args_ego[0]] = []
            o.obj_regions[args_ego[1]] = args_ego[2]
    elif a_ego_name == "open_ego":
        if random.random()<0.9: # 90% success
            o.open_door = True
    elif a_ego_name == "close_ego":
        if random.random()<0.9: # 90% success
            o.open_door = False
    
    new_belief=b.update(a,o,store)
    
    return AbstractBeliefSet.from_beliefs([new_belief], store)

# other agents actions
def transit_transfer_other_execute_fn(a, b, s, store):
    
    rob_regions = b.rob_regions.copy()    
    
    rob_regions[a.args[0]] = s.rob_regions[a.args[0]]
    
    return s, EnvObservation(rob_regions=rob_regions)
def transit_transfer_other_effects_fn(a, b, store):
    p = [] # sample probabilities for weighting
    for reg in REGIONS:
        if reg == a.args[2]:
            p.append(0.7) # random outcome
            # p.append(1.0) # deterministic outcome
        else:
            p.append(0.15) # random outcome
            # p.append(0.0) # deterministic outcome
    rob_regions = b.rob_regions.copy()    
    rob_regions[a.args[0]] = random.choice(REGIONS)#np.random.choice(np.array(ROB_REGIONS),p=np.array(p)) # weighted probabilities
    
    return rob_regions
    
def pick_other_execute_fn(a, b, s, store):        
    
    obj_regions = b.obj_regions.copy()
    holding = b.holding.copy()
            
    if s.obj_regions[a.args[1]] == "" and s.holding[a.args[0]] == [a.args[1]]: # picked
        print("picked")
        obj_regions[a.args[1]] = ""
        holding[a.args[0]] = [a.args[1]]
    else:
        print("not picked")
    
    return s, EnvObservation(obj_regions=obj_regions,holding=holding)
def pick_other_effects_fn(a, b, store):
    
    obj_regions = b.obj_regions.copy()
    holding = b.holding.copy()
    
    # if random.random()<=1.0: # deterministic outcome
    if random.random()<=0.9: # picked # tweak this number to have a higher probability of pick
        obj_regions[a.args[1]] = ""
        holding[a.args[0]] = [a.args[1]]
    
    return obj_regions,holding
    

def place_other_execute_fn(a, b, s, store):
    
    obj_regions = b.obj_regions.copy()
    holding = b.holding.copy()
    
    if s.obj_regions[a.args[1]] == a.args[2] and s.holding[a.args[0]] == []: # placed
        print("placed")
        obj_regions[a.args[1]] = a.args[2]
        holding[a.args[0]] = []
    else:
        print("not placed")
    
    return s, EnvObservation(obj_regions=obj_regions,holding=holding)
def place_other_effects_fn(a, b, store):
    
    obj_regions = b.obj_regions.copy()
    holding = b.holding.copy()
    
    # if random.random()<=1.0: # deterministic outcome
    if random.random()<=0.9: # placed # tweak this number to have a higher probability of place
        obj_regions[a.args[1]] = a.args[2]
        holding[a.args[0]] = []
    
    return obj_regions,holding
    # o = EnvObservation(obj_regions=obj_regions,holding=holding)
    # new_belief = b.update(a, o, store)
    # return AbstractBeliefSet.from_beliefs([new_belief], store)

def open_other_execute_fn(a, b, s, store):
    
    open_door = b.open_door
    
    if s.open_door: # opened
        print("opened")
        open_door = True
    else:
        print("not opened")
    
    return s, EnvObservation(open_door=open_door)
def open_other_effects_fn(a, b, store):
    
    open_door = b.open_door
    
    # if random.random()<=1.0: # deterministic outcome
    if random.random()<=0.8:
        open_door = True
        
    return open_door    
    # o = EnvObservation(open_door=open_door)
    # new_belief = b.update(a, o, store)
    # return AbstractBeliefSet.from_beliefs([new_belief], store)

def close_other_execute_fn(a, b, s, store):

    open_door = b.open_door
    
    if not s.open_door: # closed
        print("closed")
        open_door = False
    else:
        print("not closed")
    
    return s, EnvObservation(open_door=open_door)
def close_other_effects_fn(a, b, store):
    
    open_door = b.open_door
    
    # if random.random()<=1.0: # deterministic outcome
    if random.random()<=0.8:
        open_door = False
        
    return open_door
    # o = EnvObservation(open_door=open_door)
    # new_belief = b.update(a, o, store)
    # return AbstractBeliefSet.from_beliefs([new_belief], store)    


# joint actions
def joint_execute_fn(a, b, s, store):
    
    holding = b.holding.copy()
    open_door = b.open_door
    rob_regions = b.rob_regions.copy()
    obj_regions = b.obj_regions.copy()
    next_actions = b.next_actions.copy()
    
    a_other_name, a_ego_name = a.name.split("*")

    # special cases
    # remember: next actions!!
    # case 1: place, pick
    if a_other_name == "place_other" and a_ego_name == "pick_ego": 
        print("special case")
        print(a.name)
        s,obs = place_other_execute_fn(a, b, s, store)
        obj_regions = s.obj_regions
        if obj_regions[a.args[1]] == a.args[2]: # other agent placed the object
            if random.random()<0.9: # 90% success
                holding[a.args[3]] = [a.args[1]]
                obj_regions[a.args[1]] = ""
                # update state
                s.holding[a.args[3]] = [a.args[1]]
                s.obj_regions[a.args[1]] = ""
            
    # case 2: open, pick
    elif a_other_name == "open_other" and a_ego_name == "pick_ego":
        print("special case")
        print(a.name)
        s,obs = open_other_execute_fn(a, b, s, store)
        open_door = s.open_door
        if (a.args[3] == REGIONS[0] and open_door) or a.args[3] != REGIONS[0]: # in mug and door open or not in mug!
            if random.random()<0.9: # 90% success
                holding[a.args[1]] = [a.args[2]]
                obj_regions[a.args[2]] = ""
                # update state
                s.holding[a.args[1]] = [a.args[2]]
                s.obj_regions[a.args[2]] = ""
        
            
    # case 3: open, place
    elif a_other_name == "open_other" and a_ego_name == "place_ego":
        print("special case")
        print(a.name)
        s,obs = open_other_execute_fn(a, b, s, store)
        open_door = s.open_door
        if (a.args[3] == REGIONS[0] and open_door) or a.args[3] != REGIONS[0]: # in mug and door open or not in mug!
            if random.random()<0.9: # 90% success
                holding[a.args[1]] = []
                obj_regions[a.args[2]] = a.args[3]
                # update state
                s.holding[a.args[1]] = []
                s.obj_regions[a.args[2]] = a.args[3]
        
        
    # case 4: open, close
    elif a_other_name == "open_other" and a_ego_name == "close_ego":
        print("special case")
        print(a.name)
        s,obs = open_other_execute_fn(a, b, s, store)
        open_door = s.open_door
        if open_door and random.random()<0.9: # 90% success
            open_door = False
            # update state
            s.open_door = open_door
            
    # case 5: close, pick
    # case 6: close, place
    elif a_other_name == "close_other" and (a_ego_name == "pick_ego" or a_ego_name == "place_ego"):
        print("special case")
        print(a.name)
        s,obs = close_other_execute_fn(a, b, s, store)
        open_door = obs.open_door
        
        
    # case 7: close, open
    elif a_other_name == "close_other" and a_ego_name == "open_ego":
        print("special case")
        print(a.name)
        s,obs = close_other_execute_fn(a, b, s, store)
        open_door = s.open_door
        if not open_door and random.random()<0.9: # 90% success
            open_door = True
            # update state
            s.open_door = open_door
        
    else: # rest
        
        # other: non-deterministic, ego: deterministic
        
        if a_other_name == "transit_other" or a_other_name == "transfer_other":
            
            s,obs = transit_transfer_other_execute_fn(a, b, s, store)
            rob_regions[a.args[0]] = s.rob_regions[a.args[0]]
             
        elif a_other_name == "open_other":
            
            s,obs = open_other_execute_fn(a, b, s, store)
            open_door = s.open_door
            
            
        elif a_other_name == "close_other":
            
            s,obs = close_other_execute_fn(a, b, s, store)
            open_door = s.open_door
            
            
        elif a_other_name == "pick_other":
            
            s,obs = pick_other_execute_fn(a, b, s, store)
            obj_regions[a.args[1]] = s.obj_regions[a.args[1]]
            holding[a.args[0]] = s.holding[a.args[0]]
            
            
        elif a_other_name == "place_other":
            
            s,obs = place_other_execute_fn(a, b, s, store)
            obj_regions[a.args[1]] = s.obj_regions[a.args[1]]
            holding[a.args[0]] = s.holding[a.args[0]]
            
     
           
    #  next actions
    
    b_temp = copy.deepcopy(b)
    o = EnvObservation(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions)
    b_temp = b_temp.update(a,o,store)    
    # s is updated by non-deterministic ego execution    
    s,obs = non_deterministic_ego_execute_fn(a, b_temp, s, store) # next actions updated with next_actions of ego agent, not other agent!
    next_actions = obs.next_actions
        
    return s, EnvObservation(holding=s.holding,open_door=s.open_door,rob_regions=s.rob_regions,obj_regions=s.obj_regions,next_actions=next_actions)  
def joint_effects_fn(a, b, store):
    
    holding = b.holding.copy()
    open_door = b.open_door
    rob_regions = b.rob_regions.copy()
    obj_regions = b.obj_regions.copy()
    next_actions = b.next_actions.copy()
    
    a_other_name,a_ego_name = a.name.split("*")
    
    if a_other_name == "transfer_other":
        n_args = 4
    elif a_other_name == "open_other" or a_other_name == "close_other" or a_other_name == "nothing_other":
        n_args = 1
    else:
        n_args = 3
    
    args_ego = a.args[n_args:]
    
    
    # special cases
    # remember: next actions!!
    # case 1: place, pick
    if a_other_name == "place_other" and a_ego_name == "pick_ego": 
        
        obj_regions,holding = place_other_effects_fn(a, b, store)
        
        if obj_regions[a.args[1]] == a.args[2]: # other agent placed the object
            if random.random()<0.9: # ego:non-deterministic
                holding[a.args[3]] = [a.args[1]]
                obj_regions[a.args[1]] = ""
        
            
    # case 2: open, pick
    elif a_other_name == "open_other" and a_ego_name == "pick_ego":
        
        open_door = open_other_effects_fn(a, b, store)
        
        if (a.args[3] == REGIONS[0] and open_door) or a.args[3] != REGIONS[0]: # in mug and door open or not in mug!
            if random.random()<0.9:
                holding[a.args[1]] = [a.args[2]]
                obj_regions[a.args[2]] = ""
        
            
    # case 3: open, place
    elif a_other_name == "open_other" and a_ego_name == "place_ego":
        
        open_door = open_other_effects_fn(a, b, store)
        
        if (a.args[3] == REGIONS[0] and open_door) or a.args[3] != REGIONS[0]: # in mug and door open or not in mug!
            if random.random()<0.9:
                holding[a.args[1]] = []
                obj_regions[a.args[2]] = a.args[3]
        
            
    # case 4: open, close
    elif a_other_name == "open_other" and a_ego_name == "close_ego":
        
        if random.random()<0.9:
            open_door = False
        
        
    # case 5: close, pick
    # case 6: close, place
    elif a_other_name == "close_other" and (a_ego_name == "pick_ego" or a_ego_name == "place_ego"):
        
        open_door = close_other_effects_fn(a, b, store)
        
        if a_ego_name == "pick_ego":
            if random.random()<0.9:
                holding[a.args[1]] = [a.args[2]]
                obj_regions[a.args[2]] = ""
        else:
            if random.random()<0.9:
                holding[a.args[1]] = []
                obj_regions[a.args[2]] = a.args[3]
        
    # case 7: close, open
    elif a_other_name == "close_other" and a_ego_name == "open_ego":
        
        if random.random()<0.9:
            open_door = True
        
    else: # rest
        # other: non-deterministic
        
        if a_other_name == "transit_other" or a_other_name == "transfer_other":
            
            rob_regions = transit_transfer_other_effects_fn(a, b, store)
            
            
        elif a_other_name == "open_other":
            
            open_door = open_other_effects_fn(a, b, store)
            
        elif a_other_name == "close_other":
            
            open_door = close_other_effects_fn(a, b, store)
            
        elif a_other_name == "pick_other":
            
            obj_regions,holding = pick_other_effects_fn(a, b, store)
            
            
        elif a_other_name == "place_other":
            
            obj_regions,holding = place_other_effects_fn(a, b, store)
            
        # ego: non-deterministic
        
        if a_ego_name == "transit_ego" or a_ego_name == "transfer_ego":
            if random.random()<0.9:
                rob_regions[args_ego[0]] = args_ego[2]
        elif a_ego_name == "pick_ego":
            if random.random()<0.9:
                holding[args_ego[0]] = [args_ego[1]]
                obj_regions[args_ego[1]] = ""
        elif a_ego_name == "place_ego":
            if random.random()<0.9:
                holding[args_ego[0]] = []
                obj_regions[args_ego[1]] = args_ego[2]
        elif a_ego_name == "open_ego":
            if random.random()<0.9:
                open_door = True
        elif a_ego_name == "close_ego":
            if random.random()<0.9:
                open_door = False
    
    # resulting state
    b_temp = copy.deepcopy(b)
    obs = EnvObservation(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions)
    b_temp = b_temp.update(a,obs,store)   
    next_actions = get_next_actions_effects(a, b_temp, store)
    
    o = EnvObservation(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions,next_actions=next_actions)    
    new_belief=b.update(a,o,store)
    return AbstractBeliefSet.from_beliefs([new_belief], store)        
            

# rest of the ego-actions have deterministic effects! 

# Set up environment dynamics
class ToyDiscrete(TampuraEnv):
    
    def initialize(self,ego=f"{ROB}1",s=EnvState()):
        
        self.ego=ego
        
        store = AliasStore()
        
        for rob in ROBOTS:
            
            store.set(rob, rob, "robot")
        # store.set(ego,ego,"robot")
            
        for region in REGIONS:
            store.set(region, region, "region")
        
        store.set(MUG, MUG, "physical")
        store.set(DOOR, DOOR, "door")
        
        store.certified.append(Atom("stable",[MUG,REGIONS[0]]))
        store.certified.append(Atom("stable",[MUG,REGIONS[2]]))
        
        store.certified.append(Atom("is_ego",[ego]))

        holding = s.holding
        open_door = s.open_door
        rob_regions = s.rob_regions
        obj_regions = s.obj_regions
        next_actions = s.next_actions

        b = EnvBelief(holding=holding,open_door=open_door,rob_regions=rob_regions,obj_regions=obj_regions,
                      next_actions=next_actions)

        return b, store

    def get_problem_spec(self) -> ProblemSpec:
        
        actions_other = ACTION_NAMES
        
        others=[]
        for rob in ROBOTS:
            if rob != self.ego:
                others.append(rob)

        predicates = [
            Predicate("is_ego",["robot"]),
            Predicate("holding", ["robot","physical"]),
            Predicate("stable",["physical","region"]),
            Predicate("in_rob",["robot","region"]),
            Predicate("in_obj",["physical","region"]),
            Predicate("open",["door"]),
        ] 
        action_predicates = [Predicate("transit_action",["robot","region"]),Predicate("transfer_action",["robot","physical","region"]),Predicate("pick_action",["robot","physical"]),
                             Predicate("place_action",["robot","physical"]),Predicate("open_action",["robot"]),Predicate("close_action",["robot"]),Predicate("nothing_action",["robot"])]
        
        predicates += action_predicates
        
        possible_outcomes = [[Atom("transit_action",[rob,reg]) for reg in REGIONS]+[Atom("transfer_action",[rob,obj,reg]) for obj in OBJ_REGIONS.keys() for reg in REGIONS] +
                            [Atom("pick_action",[rob,obj]) for obj in OBJ_REGIONS.keys()] + [Atom("place_action",[rob,obj])for obj in OBJ_REGIONS.keys()] +
                            [Atom("open_action",[rob]),Atom("close_action",[rob]),Atom("nothing_action",[rob])] for rob in others]
        
        possible_outcomes_pick_place = [[Atom("transit_action",[rob,reg]) for reg in REGIONS]+[Atom("transfer_action",[rob,obj,reg]) for obj in OBJ_REGIONS.keys() for reg in REGIONS] +
                                        [Atom("pick_action",[rob,obj]) for obj in OBJ_REGIONS.keys()] + [Atom("place_action",[rob,obj])for obj in OBJ_REGIONS.keys()] +
                                        [Atom("nothing_action",[rob])] for rob in others]
        
        possible_outcomes_open_close = [[Atom("transit_action",[rob,reg]) for reg in REGIONS]+
                                        [Atom("open_action",[rob]),Atom("close_action",[rob]),Atom("nothing_action",[rob])] for rob in others]
        
        possible_outcomes_transit = [[Atom("transit_action",[rob,reg]) for reg in REGIONS]+
                                     [Atom("pick_action",[rob,obj]) for obj in OBJ_REGIONS.keys()] +
                                     [Atom("open_action",[rob]),Atom("close_action",[rob]),Atom("nothing_action",[rob])] for rob in others]
        
        possible_outcomes_transfer = [[Atom("transfer_action",[rob,obj,reg]) for obj in OBJ_REGIONS.keys() for reg in REGIONS] +
                                      [Atom("place_action",[rob,obj])for obj in OBJ_REGIONS.keys()] +
                                      [Atom("nothing_action",[rob])] for rob in others]
        
        
        # modify preconditions, effects and execute functions for observation
        action_schemas_ego = [
            
            # ego-agent
            ActionSchema(
                name="pick_ego",
                inputs=["?rob1","?obj1","?reg1"],
                input_types=["robot","physical","region"],
                preconditions=[Atom("is_ego",["?rob1"]), # is the ego agent
                               Or([Not(Atom("in_rob",["?rob1",REGIONS[0]])),And([Atom("in_rob",["?rob1",REGIONS[0]]),Atom("open",[DOOR])])]), # TODO: modify!! accesibility of mug: derived predicate
                               Atom("in_obj",["?obj1","?reg1"]), # object is in region from where pick is attempted
                               Atom("in_rob",["?rob1","?reg1"]), # robot is in region from where pick is attempted
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"])), # robot hand is free
                               ],
                verify_effects=[OneOf([Atom("holding",["?rob1","?obj1"]),Atom("in_obj",["?obj1","?reg1"])])], # deterministic

            ),
            
            
            ActionSchema(
                name="place_ego",
                inputs=["?rob1","?obj1","?reg1"],
                input_types=["robot","physical","region"],
                preconditions=[Atom("is_ego",["?rob1"]), # is the ego agent
                               Or([Not(Atom("in_rob",["?rob1",REGIONS[0]])),And([Atom("in_rob",["?rob1",REGIONS[0]]),Atom("open",[DOOR])])]), # TODO: modify!! accessibility of region
                               Atom("in_rob",["?rob1","?reg1"]), # robot is in region where place is attempted
                               Atom("holding",["?rob1","?obj1"]), # robot is holding the object that is to be placed 
                               Atom("stable",["?obj1","?reg1"]), # region where place is attempted is stable
                               ],
                verify_effects=[OneOf([Atom("holding",["?rob1","?obj1"]),Atom("in_obj",["?obj1","?reg1"])])], # deterministic 
            ),
            

            ActionSchema(
                name="transit_ego",
                inputs=["?rob1","?reg1","?reg2"],
                input_types=["robot","region","region"],
                preconditions=[Atom("is_ego",["?rob1"]), # is the ego agent
                               Atom("in_rob",["?rob1","?reg1"]),
                               Not(Atom("in_rob",["?rob1","?reg2"])),
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"])), # robot hand is free
                               ],
                verify_effects=[OneOf([Atom("in_rob",["?rob1","?reg1"]),Atom("in_rob",["?rob1","?reg2"])])],
            ),
            ActionSchema(
                name="transfer_ego",
                inputs=["?rob1","?reg1","?reg2","?obj1"],
                input_types=["robot","region","region","physical"],
                preconditions=[Atom("is_ego",["?rob1"]), # is the ego agent
                               Atom("in_rob",["?rob1","?reg1"]),
                               Not(Atom("in_rob",["?rob1","?reg2"])),
                               Atom("holding",["?rob1","?obj1"])],
                verify_effects=[OneOf([Atom("in_rob",["?rob1","?reg1"]),Atom("in_rob",["?rob1","?reg2"])])],
            ),
            ActionSchema(
                name="open_ego",
                inputs=["?rob1"],
                input_types=["robot"],
                preconditions=[Atom("is_ego",["?rob1"]), # is the ego agent
                               Not(Atom("open",[DOOR])),
                               Atom("in_rob",["?rob1",REGIONS[1]]),
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"]))],
                verify_effects=[Atom("open",[DOOR])],
                
            ),
            ActionSchema(
                name="close_ego",
                inputs=["?rob1"],
                input_types=["robot"],
                preconditions=[Atom("is_ego",["?rob1"]), # is the ego agent
                               Atom("open",[DOOR]),
                               Atom("in_rob",["?rob1",REGIONS[1]]),
                               Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"]))],
                verify_effects=[Not(Atom("open",[DOOR]))],
        
            ),
            
            ActionSchema(
                name="nothing_ego",
                inputs=["?rob1"],
                input_types=["robot"],
                preconditions=[Atom("is_ego",["?rob1"])],
                effects=[],
            ),
        ]
        
        action_schemas_other = [
            
            # other agents
            ActionSchema(
                name="pick_other",
                inputs=["?rob2","?obj2","?reg3"],
                input_types=["robot","physical","region"],
                preconditions=[Not(Atom("is_ego",["?rob2"])), # is not the ego agent
                               Atom("pick_action",["?rob2","?obj2"]), # other agents' turn
                               Or([Not(Atom("in_rob",["?rob2",REGIONS[0]])),And([Atom("in_rob",["?rob2",REGIONS[0]]),Atom("open",[DOOR])])]), # accesibility of mug: derived predicate
                               Atom("in_obj",["?obj2","?reg3"]), # object is in region from where pick is attempted
                               Atom("in_rob",["?rob2","?reg3"]), # robot is in region from where pick is attempted
                               Not(Exists(Atom("holding",["?rob2","?obj"]),["?obj"],["physical"])), # robot hand is free
                               ],
                verify_effects=[OneOf([Atom("holding",["?rob2","?obj2"]),Atom("in_obj",["?obj2","?reg3"])])]+[OneOf(po) for po in possible_outcomes_pick_place],
            ),
            
            
            ActionSchema(
                name="place_other",
                inputs=["?rob2","?obj2","?reg3"],
                input_types=["robot","physical","region"],
                preconditions=[Not(Atom("is_ego",["?rob2"])), # is not the ego agent
                               Atom("place_action",["?rob2","?obj2"]), # other agents' turn
                               Or([Not(Atom("in_rob",["?rob2",REGIONS[0]])),And([Atom("in_rob",["?rob2",REGIONS[0]]),Atom("open",[DOOR])])]), # accessibility of region
                               Not(Atom("in_obj",["?obj2","?reg3"])), # object is in region where place is attempted
                               Atom("in_rob",["?rob2","?reg3"]), # robot is in region where place is attempted
                               Atom("holding",["?rob2","?obj2"]), # robot is holding the object that is to be placed 
                               Atom("stable",["?obj2","?reg3"]), # region where place is attempted is stable
                               ],
                verify_effects=[OneOf([Atom("holding",["?rob2","?obj2"]),Atom("in_obj",["?obj2","?reg3"])])]+[OneOf(po) for po in possible_outcomes_pick_place],
            ),
            
            ActionSchema(
                name="transit_other",
                inputs=["?rob2","?reg3","?reg4"],
                input_types=["robot","region","region"],
                preconditions=[Not(Atom("is_ego",["?rob2"])), # is not the ego agent
                               Atom("transit_action",["?rob2","?reg4"]), # other agents' turn
                               Atom("in_rob",["?rob2","?reg3"]),
                               Not(Atom("in_rob",["?rob2","?reg4"])),
                               Not(Exists(Atom("holding",["?rob2","?obj"]),["?obj"],["physical"])), # robot hand is free
                               ],
                verify_effects=[OneOf([Atom("in_rob",["?rob2",reg]) for reg in REGIONS])]+[OneOf(po) for po in possible_outcomes_transit],
            ),
            
            ActionSchema(
                name="transfer_other",
                inputs=["?rob2","?reg3","?reg4","?obj2"],
                input_types=["robot","region","region","physical"],
                preconditions=[Not(Atom("is_ego",["?rob2"])), # is not the ego agent
                               Atom("transfer_action",["?rob2","?obj2","?reg4"]), # other agents' turn
                               Atom("in_rob",["?rob2","?reg3"]),
                               Not(Atom("in_rob",["?rob2","?reg4"])),
                               Atom("holding",["?rob2","?obj2"]),
                               ],
                verify_effects=[OneOf([Atom("in_rob",["?rob2",reg]) for reg in REGIONS])]+[OneOf(po) for po in possible_outcomes_transfer],
            ),
            ActionSchema(
                name="open_other",
                inputs=["?rob2"],
                input_types=["robot"],
                preconditions=[Not(Atom("is_ego",["?rob2"])), # is not the ego agent
                               Atom("open_action",["?rob2"]), # other agents' turn
                               Not(Atom("open",[DOOR])),
                               Atom("in_rob",["?rob2",REGIONS[1]]),
                               Not(Exists(Atom("holding",["?rob2","?obj"]),["?obj"],["physical"]))],
                verify_effects=[Atom("open",[DOOR])]+[OneOf(po) for po in possible_outcomes_open_close], # TODO: modify     
            ),
            ActionSchema(
                name="close_other",
                inputs=["?rob2"],
                input_types=["robot"],
                preconditions=[Not(Atom("is_ego",["?rob2"])), # is not the ego agent
                               Atom("close_action",["?rob2"]), # other agents' turn
                               Atom("open",[DOOR]),
                               Atom("in_rob",["?rob2",REGIONS[1]]),
                               Not(Exists(Atom("holding",["?rob2","?obj"]),["?obj"],["physical"]))],
                verify_effects=[Not(Atom("open",[DOOR]))]+[OneOf(po) for po in possible_outcomes_open_close], # TODO: modify
            ),
            ActionSchema(
                name="nothing_other",
                inputs=["?rob2"],
                input_types=["robot"],
                preconditions=[Not(Atom("is_ego",["?rob2"])),
                               Atom("nothing_action",["?rob2"])],
                verify_effects=[OneOf(po) for po in possible_outcomes],
            )
            
            
        ]
        
        
        
        action_schemas = []
        
        for as_other in action_schemas_other:
            
            as_other_name = as_other.name
            
            for as_ego in action_schemas_ego:
                
                as_ego_name = as_ego.name
                schema = ActionSchema()
                
                if (as_other_name == "transfer_other" and (as_ego_name == "transfer_ego" or as_ego_name == "pick_ego" or as_ego_name == "place_ego")) or \
                    (as_other_name == "pick_other" and (as_ego_name == "transfer_ego" or as_ego_name == "pick_ego" or as_ego_name == "place_ego")) or \
                        (as_other_name == "open_other" and as_ego_name == "open_ego") or (as_other_name == "close_other" and as_ego_name == "close_ego") or \
                            (as_other_name == "place_other" and (as_ego_name == "place_ego" or as_ego_name == "transfer_ego")): # not possible under beliefs
                    
                    continue
                
                # special cases
                # assumption: other agent acts before ego agent
                # assumption: pick is confusible with place in the sense nothing happens and vice versa
                # transit, transfer regions are confusible, nothing may happen (same region)
                # open, close are confusible with each other in the sense nothing happens
                # noop observation is deterministic
                
                # case 1: place, pick
                elif as_other_name == "place_other" and as_ego_name == "pick_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = ["?rob2","?obj1","?reg1","?rob1"]
                    schema.input_types = ["robot","physical","region","robot"]
                    schema.preconditions = [Atom("place_action",["?rob2","?obj1"]),Atom("is_ego",["?rob1"]),Not(Atom("is_ego",["?rob2"])),Atom("holding",["?rob2","?obj1"]),
                                            Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"])),Atom("in_rob",["?rob1","?reg1"]),
                                            Atom("in_rob",["?rob2","?reg1"]),Atom("stable",["?obj1","?reg1"]),
                                            Or([Not(Atom("in_rob",["?rob2",REGIONS[0]])),And([Atom("in_rob",["?rob2",REGIONS[0]]),Atom("open",[DOOR])])]), # accessibility of region for place
                                            ]
                    schema.effects = [Not(Atom("place_action",["?rob2","?obj1"]))]
                    schema.verify_effects = [OneOf(po) for po in possible_outcomes_pick_place] + [OneOf([Atom("holding",["?rob1","?obj1"]),Atom("holding",["?rob2","?obj1"]),Atom("in_obj",["?obj1","?reg1"])])]
                    
                # case 2: open, pick
                elif as_other_name == "open_other" and as_ego_name == "pick_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + [Atom("is_ego",["?rob1"]), Atom("in_obj",["?obj1","?reg1"]), Atom("in_rob",["?rob1","?reg1"]), 
                                                                     Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"]))]
                    schema.effects = as_other.effects 
                    schema.verify_effects = as_other.verify_effects + [OneOf([Atom("holding",["?rob1","?obj1"]),Atom("in_obj",["?obj1","?reg1"])])]
                    
                    
                # case 3: open, place
                elif as_other_name == "open_other" and as_ego_name == "place_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + [Atom("is_ego",["?rob1"]), Atom("holding",["?rob1","?obj1"]), Atom("in_rob",["?rob1","?reg1"]), 
                                                                     Atom("stable",["?obj1","?reg1"])]
                    schema.effects = as_other.effects
                    schema.verify_effects = as_other.verify_effects + [OneOf([Atom("holding",["?rob1","?obj1"]),Atom("in_obj",["?obj1","?reg1"])])]
                    
                # case 4: open, close
                elif as_other_name == "open_other" and as_ego_name == "close_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + as_ego.preconditions
                    schema.effects = as_other.effects + as_ego.effects
                    schema.verify_effects = [OneOf(po) for po in possible_outcomes_open_close] + as_ego.verify_effects 
                    
                # case 5: close, pick
                elif as_other_name == "close_other" and as_ego_name == "pick_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + [Atom("is_ego",["?rob1"]), Not(Atom("in_obj",["?obj1",REGIONS[0]])),
                                                                     Not(Exists(Atom("holding",["?rob1","?obj"]),["?obj"],["physical"])), Atom("in_rob",["?rob1","?reg1"])]
                    schema.effects = as_other.effects + as_ego.effects # guaranteed pick in region stable but belief inhibits attempting place in region mug
                    schema.verify_effects = as_other.verify_effects + as_ego.verify_effects
                    
                # case 6: close, place
                elif as_other_name == "close_other" and as_ego_name == "place_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + [Atom("is_ego",["?rob1"]), Atom("holding",["?rob1","?obj1"]), Atom("in_rob",["?rob1","?reg1"]), 
                                                                     Not(Atom("in_rob",["?rob1",REGIONS[0]])), Atom("stable",["?obj1","?reg1"])]
                    schema.effects = as_other.effects + as_ego.effects # guaranteed place in region stable but belief inhibits attempting place in region mug
                    schema.verify_effects = as_other.verify_effects + as_ego.verify_effects
                    
                # case 7: close, open
                elif as_other_name == "close_other" and as_ego_name == "open_ego":
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + as_ego.preconditions
                    schema.effects = as_other.effects + as_ego.effects
                    schema.verify_effects = [OneOf(po) for po in possible_outcomes_open_close] + as_ego.verify_effects
                
                # regular cases
                else: 
                    
                    schema.name = as_other_name+"*"+as_ego_name
                    schema.inputs = as_other.inputs + as_ego.inputs
                    schema.input_types = as_other.input_types + as_ego.input_types
                    schema.preconditions = as_other.preconditions + as_ego.preconditions
                    schema.effects = as_other.effects + as_ego.effects
                    schema.verify_effects = as_other.verify_effects + as_ego.verify_effects   
                    
                schema.execute_fn = joint_execute_fn
                schema.effects_fn = joint_effects_fn

                action_schemas.append(schema)
                    
       
        
        reward = GOAL

        spec = ProblemSpec(
            predicates=predicates,
            action_schemas=action_schemas,
            reward=reward,
        )

        return spec



## Create environment and planner

In [11]:
# Planner
cfg = load_config(config_file="../tampura/config/default.yml")

# Set some print options to print out abstract belief, action, observation, and reward
cfg["print_options"] = "ab,a,o,r"
cfg["vis_graph"] = True
# batch size 100, num samples 500 num skeletons 100 works best!!
cfg["batch_size"] = 500 #100 
cfg["num_samples"] = 500 #500
cfg["max_steps"] = 15
cfg["num_skeletons"] = 100
cfg["flat_sample"] = False # TODO: check; may cause progressive widening


# experiment with from_scratch and envelope threshold! comment out next two lines to stick to previous version!

cfg['from_scratch'] = False # imp: re-use!!! but graph gets too big
cfg["envelope_threshold"] = 0.1 # low, keep executing current plan!

# state
s = EnvState(holding={ROBOTS[0]:[],ROBOTS[1]:[]},open_door=False,
             rob_regions={ROBOTS[0]:REGIONS[-1],ROBOTS[1]:REGIONS[-1]},
             obj_regions={MUG:REGIONS[0]},
             next_actions=["nothing_action-"+ROBOTS[0],"nothing_action-"+ROBOTS[1]])

save_dir = os.getcwd()+"/runs/run{}".format(time.time())
# for robot1
# Initialize 
save_dir_1 = save_dir + "planner1"
cfg1 = cfg.copy()
cfg1['save_dir'] = save_dir_1
env1 = ToyDiscrete(config=cfg1)
b01, store1= env1.initialize(ego=ROBOTS[0],s=s)
# for robot2
# Initialize 
save_dir_2 = save_dir + "planner2"
cfg2 = cfg.copy()
cfg2['save_dir'] = save_dir_2
env2 = ToyDiscrete(config=cfg2)
b02, store2= env2.initialize(ego=ROBOTS[1],s=s)

# Set up logger to print info
setup_logger(cfg1["save_dir"], logging.INFO)
setup_logger(cfg2["save_dir"], logging.INFO)

# Initialize the policy


planner1 = TampuraPolicy(config = cfg1, problem_spec = env1.problem_spec)
planner2 = TampuraPolicy(config = cfg2, problem_spec = env2.problem_spec)

env1.state = copy.deepcopy(s)
env2.state = copy.deepcopy(s)

## Run Planner
Make sure symk is installed (see README) before running the Tampura planner.
With the default settings, the planner should pick both every time.

In [12]:
b1=b01
b2=b02

assert env1.problem_spec.verify(store1)
assert env2.problem_spec.verify(store2)

save_config(planner1.config, planner1.config["save_dir"])
save_config(planner2.config, planner2.config["save_dir"])

history1 = RolloutHistory(planner1.config)
history2 = RolloutHistory(planner2.config)

st = time.time()
for step in range(100):

    # robot 1 acts
    env1.state = copy.deepcopy(env2.state) # important!!
    s1 = copy.deepcopy(env1.state)
    b1.next_actions = s1.next_actions # important!!
    a_b1 = b1.abstract(store1)
    reward1 = env1.problem_spec.get_reward(a_b1, store1)
    # if reward1:
    #     print("goal achieved")
    #     break
    
    logging.info("\n robot 1 ")
    logging.info("\n" + ("=" * 10) + "t=" + str(step) + ("=" * 10))
    if "s" in planner1.print_options:
        logging.info("State: " + str(s1))
    if "b" in planner1.print_options:
        logging.info("Belief: " + str(b1))
    if "ab" in planner1.print_options:
        logging.info("Abstract Belief: " + str(a_b1))
    if "r" in planner1.print_options:
        logging.info("Reward: " + str(reward1))
    
    
    action1, info1, store1 = planner1.get_action(b1, store1) # should only call effects functions!!??
    
    
    if "a" in planner1.print_options:
        logging.info("Action: " + str(action1))

    if action1.name == "no-op":
        bp1 = copy.deepcopy(b1)
        observation1 = None
    else:
        observation1= env1.step(action1, b1, store1) # should call execute function
        bp1 = b1.update(action1, observation1, store1)

        if planner1.config["vis"]:
            env1.vis_updated_belief(bp1, store1)

    a_bp1 = bp1.abstract(store1)
    history1.add(s1, b1, a_b1, action1, observation1, reward1, info1, store1, time.time() - st)

    reward1 = env1.problem_spec.get_reward(a_bp1, store1)
    
    if "o" in planner1.print_options:
        logging.info("Observation: " + str(observation1))
    if "sp" in planner1.print_options:
        logging.info("Next State: " + str(env1.state))
    if "bp" in planner1.print_options:
        logging.info("Next Belief: " + str(bp1))
    if "abp" in planner1.print_options:
        logging.info("Next Abstract Belief: " + str(a_bp1))
    if "rp" in planner1.print_options:
        logging.info("Next Reward: " + str(reward1))

    # update the belief
    b1 = bp1
    
    # robot 2 acts
    env2.state = copy.deepcopy(env1.state) # important!!
    s2 = copy.deepcopy(env2.state)
    b2.next_actions = s2.next_actions # important!!
    a_b2 = b2.abstract(store2)
    reward2 = env2.problem_spec.get_reward(a_b2, store2)
    
    
    if reward1 and reward2:
        print("goal achieved")
        break

    logging.info("\n robot 2 ")
    logging.info("\n" + ("=" * 10) + "t=" + str(step) + ("=" * 10))
    if "s" in planner2.print_options:
        logging.info("State: " + str(s2))
    if "b" in planner1.print_options:
        logging.info("Belief: " + str(b2))
    if "ab" in planner1.print_options:
        logging.info("Abstract Belief: " + str(a_b2))
    if "r" in planner1.print_options:
        logging.info("Reward: " + str(reward2))
    
    
    action2, info2, store2 = planner2.get_action(b2, store2) # should only call effects functions!!??
    
    
    if "a" in planner2.print_options:
        logging.info("Action: " + str(action2))

    if action2.name == "no-op":
        bp2 = copy.deepcopy(b2)
        observation2 = None
    else:
        observation2= env2.step(action2, b2, store2) # should call execute function
        bp2 = b2.update(action2, observation2, store2)

        if planner2.config["vis"]:
            env2.vis_updated_belief(bp2, store2)

    a_bp2 = bp2.abstract(store2)
    history2.add(s2, b2, a_b2, action2, observation2, reward2, info2, store2, time.time() - st)

    reward2 = env2.problem_spec.get_reward(a_bp2, store2)
    
    if "o" in planner2.print_options:
        logging.info("Observation: " + str(observation2))
    if "sp" in planner2.print_options:
        logging.info("Next State: " + str(env2.state))
    if "bp" in planner2.print_options:
        logging.info("Next Belief: " + str(bp2))
    if "abp" in planner2.print_options:
        logging.info("Next Abstract Belief: " + str(a_bp2))
    if "rp" in planner2.print_options:
        logging.info("Next Reward: " + str(reward2))

    # update the belief
    b2 = bp2

history1.add(env1.state, bp1, a_bp1, None, None, reward1, info1, store1, time.time() - st)
history2.add(env2.state, bp2, a_bp2, None, None, reward2, info2, store2, time.time() - st)
    
logging.info("=" * 20)

env1.wrapup()
env2.wrapup()

if not planner1.config["real_execute"]:
    save_run_data(history1, planner1.config["save_dir"])

if not planner2.config["real_execute"]:
    save_run_data(history2, planner2.config["save_dir"])


 robot 1 

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_stable_mug']), Atom(pred_name='nothing_action', args=['robot_2']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='in_rob', args=['robot_2', 'region_stable_mug'])])
Reward: 0.0
[TampuraPolicy] Flat Action Parameter Sampling
[TampuraPolicy] Outcome Sampling


  0%|          | 0/500 [00:00<?, ?it/s]

100%|██████████| 500/500 [00:09<00:00, 51.84it/s]

[TampuraPolicy] MDP Solving





Action: nothing_other*transit_ego(robot_2, robot_1, region_stable_mug, region_door)
Observation: EnvObservation(holding={'robot_1': [], 'robot_2': []}, open_door=False, rob_regions={'robot_1': 'region_door', 'robot_2': 'region_stable_mug'}, obj_regions={'mug': 'region_mug'}, next_actions=['transit_action-robot_1%region_door', 'nothing_action-robot_2'])

 robot 2 

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_stable_mug']), Atom(pred_name='transit_action', args=['robot_1', 'region_door']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='in_rob', args=['robot_2', 'region_stable_mug'])])
Reward: 0.0
[TampuraPolicy] Flat Action Parameter Sampling
[TampuraPolicy] Outcome Sampling


100%|██████████| 500/500 [00:10<00:00, 45.91it/s]

[TampuraPolicy] MDP Solving





Action: transit_other*transit_ego(robot_1, region_stable_mug, region_door, robot_2, region_stable_mug, region_door)
Observation: EnvObservation(holding={'robot_1': [], 'robot_2': []}, open_door=False, rob_regions={'robot_1': 'region_door', 'robot_2': 'region_door'}, obj_regions={'mug': 'region_mug'}, next_actions=['transit_action-robot_2%region_door', 'nothing_action-robot_1'])

 robot 1 

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='transit_action', args=['robot_2', 'region_door']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='in_rob', args=['robot_2', 'region_stable_mug'])])
Reward: 0.0
[TampuraPolicy] MDP Solving
Action: transit_other*open_ego(robot_2, region_stable_mug, region_door, robot_1)
Observation: EnvObservation(holding={'robot_1': [], 'robot_2': []}, open_door=True, rob_regions={'robot_1': 'region_door', 'robot_2': 'region_door'}, obj_regions={'mug': 'region_mug'}, next_actions=['o

100%|██████████| 500/500 [00:11<00:00, 44.15it/s]

[TampuraPolicy] MDP Solving





Action: transit_other*nothing_ego(robot_2, region_door, region_mug, robot_1)
Observation: EnvObservation(holding={'robot_1': [], 'robot_2': []}, open_door=True, rob_regions={'robot_1': 'region_door', 'robot_2': 'region_mug'}, obj_regions={'mug': 'region_mug'}, next_actions=['nothing_action-robot_1', 'nothing_action-robot_2'])

 robot 2 

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='nothing_action', args=['robot_1']), Atom(pred_name='in_rob', args=['robot_2', 'region_mug']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='open', args=['door'])])
Reward: 0.0
[TampuraPolicy] Flat Action Parameter Sampling
[TampuraPolicy] Outcome Sampling


100%|██████████| 500/500 [00:15<00:00, 32.30it/s]

[TampuraPolicy] MDP Solving





Action: nothing_other*pick_ego(robot_1, robot_2, mug, region_mug)
Observation: EnvObservation(holding={'robot_1': [], 'robot_2': ['mug']}, open_door=True, rob_regions={'robot_1': 'region_door', 'robot_2': 'region_mug'}, obj_regions={'mug': ''}, next_actions=['pick_action-robot_2%mug', 'nothing_action-robot_1'])

 robot 1 

Abstract Belief: AbstractBelief(items=[Atom(pred_name='in_rob', args=['robot_1', 'region_door']), Atom(pred_name='in_rob', args=['robot_2', 'region_mug']), Atom(pred_name='in_obj', args=['mug', 'region_mug']), Atom(pred_name='open', args=['door']), Atom(pred_name='pick_action', args=['robot_2', 'mug'])])
Reward: 0.0
[TampuraPolicy] MDP Solving
Action: pick_other*close_ego(robot_2, mug, region_mug, robot_1)
picked
Observation: EnvObservation(holding={'robot_1': [], 'robot_2': ['mug']}, open_door=False, rob_regions={'robot_1': 'region_door', 'robot_2': 'region_mug'}, obj_regions={'mug': ''}, next_actions=['close_action-robot_1', 'nothing_action-robot_2'])

 robot 2 

A

In [None]:
cfg

{'task': 'class_uncertain',
 'planner': 'tampura_policy',
 'global_seed': 0,
 'vis': False,
 'vis_graph': True,
 'print_options': 'ab,a,o,r',
 'max_steps': 15,
 'batch_size': 500,
 'num_skeletons': 100,
 'flat_sample': False,
 'flat_width': 1,
 'pwa': 0.2,
 'pwk': 3.0,
 'envelope_threshold': 0.05,
 'num_samples': 500,
 'gamma': 0.95,
 'decision_strategy': 'prob',
 'learning_strategy': 'bayes_optimistic',
 'load': None,
 'real_camera': False,
 'real_execute': False,
 'symk_selection': 'unordered',
 'symk_direction': 'fw',
 'symk_simple': True,
 'from_scratch': True,
 'save_dir': '/home/am/tamp_multi_agent/notebooks/runs/run1748338845.8320177'}