In [1]:
from meta_planning import dataset
from meta_planning import LearningTask
from meta_planning.evaluation import SynEvaluator

# Model Learning

A model learning task is defined as $\Lambda = <m,\mathcal{O}>$, where

* $m$ is the initial empty domain model
* $\mathcal{O}$ is a set of observations

A solution to $\Lambda$ consists of:
* a model $m'$ consistent with $m$ and $\mathcal{O}$
* a set of of explanations $\mathcal{E} = \{<t,o> | o \in \mathcal{O}\}$ where $t$ is a trajectory generated by $m'$ and consistent with $o$

### Pick a domain

In [2]:
dataset.list_learning_domains()

Learning domains:
	 blocks
	 driverlog
	 ferry
	 floortile
	 grid
	 gripper
	 hanoi
	 miconic
	 npuzzle
	 parking
	 rovers
	 satellite
	 transport
	 visitall
	 zenotravel


In [3]:
domain = "blocks"

### Define the initial model

In [4]:
m_ref = dataset.load_model(domain)

m = m_ref.observe(precondition_observability=0, effect_observability=0)

print(m)

(define (domain blocks)
(:requirements :strips)
(:types object)
(:predicates
	(on ?o1 - object ?o2 - object)
	(ontable ?o1 - object)
	(clear ?o1 - object)
	(handempty )
	(holding ?o1 - object)
)

(:action pick-up
	:parameters (?o1 - object)
	:precondition (and )
	:effect (and 
	)
)

(:action put-down
	:parameters (?o1 - object)
	:precondition (and )
	:effect (and 
	)
)

(:action stack
	:parameters (?o1 - object ?o2 - object)
	:precondition (and )
	:effect (and 
	)
)

(:action unstack
	:parameters (?o1 - object ?o2 - object)
	:precondition (and )
	:effect (and 
	)
))


### Define the observations

An observation is a partially observed trajectory $\tau = <s_0, a_1, s_1, \ldots, a_n, s_n >$.

**Setting 1 - Bounded solution:** The length of the observed trajectory is known when:
* every action is observed, or
* every state is observed (even partially).

**Setting 2 - Unbounded solution:** The observation is not indicative of the length of the trajectory when:
* there are missing actions, and
* there are missing states.



In [5]:
num_observations = 2 #1-10
T = dataset.load_trajectories(domain, select=range(num_observations))

O = [t.observe(0.4, action_observability=0, goal_observability=1, keep_every_state=False) for t in T]

print(O[0])

(observation

(:objects e - object b - object f - object d - object c - object g - object a - object)

(:state (clear a) (not (clear b)) (clear c) (not (clear d)) (not (clear e)) (not (clear f)) (not (clear g)) (handempty ) (not (holding a)) (not (holding b)) (not (holding c)) (not (holding d)) (not (holding e)) (not (holding f)) (not (holding g)) (not (on a a)) (not (on a b)) (not (on a c)) (not (on a d)) (not (on a e)) (not (on a f)) (on a g) (not (on b a)) (not (on b b)) (not (on b c)) (not (on b d)) (on b e) (not (on b f)) (not (on b g)) (not (on c a)) (not (on c b)) (not (on c c)) (on c d) (not (on c e)) (not (on c f)) (not (on c g)) (not (on d a)) (on d b) (not (on d c)) (not (on d d)) (not (on d e)) (not (on d f)) (not (on d g)) (not (on e a)) (not (on e b)) (not (on e c)) (not (on e d)) (not (on e e)) (on e f) (not (on e g)) (not (on f a)) (not (on f b)) (not (on f c)) (not (on f d)) (not (on f e)) (not (on f f)) (not (on f g)) (not (on g a)) (not (on g b)) (not (on g c)) (not 

### Solve the task

In [6]:
task = LearningTask(m,O)

solution = task.learn()

ulimit -t 3000; /home/dieaigar/PhD/meta-planning/src/meta_planning/util/planners/madagascar/M compiled_domain compiled_problem -S 1 -Q -o solution_plan -F 41 > planner_out


In [7]:
print(solution.learned_model)

(define (domain blocks)
(:requirements :strips)
(:types object)
(:predicates
	(on ?o1 - object ?o2 - object)
	(ontable ?o1 - object)
	(clear ?o1 - object)
	(handempty )
	(holding ?o1 - object)
)

(:action pick-up
	:parameters (?o1 - object)
	:precondition (and (holding ?o1))
	:effect (and 
		(handempty )
		(clear ?o1)
		(ontable ?o1)
		(not (holding ?o1))
		0
	)
)

(:action put-down
	:parameters (?o1 - object)
	:precondition (and (handempty ) (clear ?o1) (ontable ?o1))
	:effect (and 
		(not (handempty ))
		(not (clear ?o1))
		(not (ontable ?o1))
		(holding ?o1)
		0
	)
)

(:action stack
	:parameters (?o1 - object ?o2 - object)
	:precondition (and (handempty ) (clear ?o2) (on ?o2 ?o1))
	:effect (and 
		(clear ?o1)
		(not (on ?o2 ?o1))
		(holding ?o2)
		(not (clear ?o2))
		0
	)
)

(:action unstack
	:parameters (?o1 - object ?o2 - object)
	:precondition (and (handempty ) (clear ?o1) (holding ?o2))
	:effect (and 
		(not (clear ?o1))
		(on ?o2 ?o1)
		(not (holding ?o2))
		(clear ?o2)
		0
	)


In [8]:
print(solution.solution_plan)

0 : (insert_eff_pick-up_clear_var1 )
1 : (insert_eff_pick-up_handempty )
2 : (insert_eff_pick-up_holding_var1 )
3 : (insert_eff_pick-up_ontable_var1 )
4 : (insert_eff_put-down_clear_var1 )
5 : (insert_eff_put-down_handempty )
6 : (insert_eff_put-down_holding_var1 )
7 : (insert_eff_put-down_ontable_var1 )
8 : (insert_eff_stack_clear_var1 )
9 : (insert_eff_stack_clear_var2 )
10 : (insert_eff_stack_holding_var2 )
11 : (insert_eff_stack_on_var2_var1 )
12 : (insert_eff_unstack_clear_var1 )
13 : (insert_eff_unstack_clear_var2 )
14 : (insert_eff_unstack_holding_var2 )
15 : (insert_eff_unstack_on_var2_var1 )
16 : (insert_pre_pick-up_holding_var1 )
17 : (insert_pre_put-down_clear_var1 )
18 : (insert_pre_put-down_handempty )
19 : (insert_pre_put-down_ontable_var1 )
20 : (insert_pre_stack_clear_var2 )
21 : (insert_pre_stack_handempty )
22 : (insert_pre_stack_on_var2_var1 )
23 : (insert_pre_unstack_clear_var1 )
24 : (insert_pre_unstack_handempty )
25 : (insert_pre_unstack_holding_var2 )
26 : (vali

### Evaluate the learned model

### Evaluate the learned model

The evaluation metrics are:

$Precision=\frac{tp}{tp+fp}=\frac{size(\mathcal{M})- \left|DEL(\mathcal{M},GTM)\right|}{size(\mathcal{M})}$

$Recall= \frac{tp}{tp+fn}=\frac{size(\mathcal{M})- \left|DEL(\mathcal{M},GTM)\right|}{size(\mathcal{M}) - \left|DEL(\mathcal{M},GTM)\right| + \left|INS(\mathcal{M},GTM)\right|}$

In [9]:
evaluator = SynEvaluator(solution.learned_model, m_ref)

evaluator.evaluate()

(:action put-down
	:parameters (?o1 - object)
	:precondition (and
		(handempty )
		(ontable ?o1)
		(clear ?o1)
	)
	:effects (and
		(not (ontable ?o1))
		(not (handempty ))
		(holding ?o1)
		(not (clear ?o1))
	)
)

(:action pick-up
	:parameters (?o1 - object)
	:precondition (and
		(holding ?o1)
	)
	:effects (and
		(ontable ?o1)
		(handempty )
		(not (holding ?o1))
		(clear ?o1)
	)
)

(:action stack
	:parameters (?o1 - object ?o2 - object)
	:precondition (and
		(clear ?o2)
[91m		(handempty )[0m
[91m		(on ?o2 ?o1)[0m
[94m		(holding ?o1)[0m
	)
	:effects (and
		(not (clear ?o2))
		(clear ?o1)
[91m		(not (on ?o2 ?o1))[0m
[91m		(holding ?o2)[0m
[94m		(on ?o1 ?o2)[0m
[94m		(not (holding ?o1))[0m
[94m		(handempty )[0m
	)
)

(:action unstack
	:parameters (?o1 - object ?o2 - object)
	:precondition (and
		(handempty )
		(clear ?o1)
[91m		(holding ?o2)[0m
[94m		(on ?o1 ?o2)[0m
	)
	:effects (and
		(clear ?o2)
		(not (clear ?o1))
[91m		(on ?o2 ?o1)[0m
[91m		(not (holding ?o2))