In [1]:
from planning_inference.parsers import parse_model, parse_problem, parse_plan, parse_trajectory, parse_hypothesis, parse_observation_sequence
from planning_inference.generator import generate_trajectory
from planning_inference.functions import generate_all_literals, get_matching_literals

from planning_inference.pddl import Conjunction, Literal, Type, TypedObject, Effect, Truth, NumericConstant, PrimitiveNumericExpression, Increase
from planning_inference.pddl import SensorModel

from planning_inference.observations import Trajectory, Hypothesis, State

from planning_inference import DecodingTask

from sensor_models import load_sensor_model

import os
import copy
from collections import defaultdict
from itertools import combinations
from random import choice, choices, shuffle
from statistics import mean
import glob
import time

In [2]:
def launch_experiments(domain, task, timeout):
    Ms = load_sensor_model(domain)
    
    observabilities = [30, 50, 70]
       
    for observability in observabilities:
        base_path = "benchmark/%s/%s/%s/" % (domain, task, str(observability))

        problems = sorted(glob.glob(base_path + "*"))

        for problem in problems:
            costs = []
            times = []

            print(problem)

            # Planning Model
            Mp = parse_model(problem + "/domain")
            
            # Real Hypothesis
            with open(problem + "/sol", "r") as f:
                correct_h = int(f.read())

            print("Correct hypothesis: %d" % correct_h)

            # Observation sequence
#             obs = parse_observation_sequence(problem + "/obs")

            # Hypotheses
            h_files = sorted(glob.glob(problem + "/hyp*"))

            for i in range(len(h_files)):
                h = parse_LTLHypothesis(h_files[i])

                #Build a decoding problem for each hypothesis
                T = DecodingTaskLTL(Mp,Ms,h)

                tic = time.time()
                sol = T.decode(clean=True, planner="downward", t=timeout)
                toc = time.time()


                if len(sol.actions) == 0:
                    cost = 1000
                else:
                    cost = len(sol.actions)
                    sol.to_file(problem + "/plan.%s" % str(i).zfill(2))

                duration = toc - tic

                print("Hypothesis %d: %d, %.2f" % (i, cost, duration))

                costs.append(cost)
                times.append(duration)

            with open(problem + "/costs", "w") as f:
                f.write(" ".join(map(str, costs)))

            with open(problem + "/times", "w") as f:
                f.write(" ".join(map(str, times)))
    

In [3]:
domain = "driverlog"
timeout = 120

In [4]:
# MONITORING

task = "monitoring"
launch_experiments(domain, task, timeout)

benchmark/driverlog/monitoring/30/P00
Correct hypothesis: 3
Hypothesis 0: 1000, 27.38
Hypothesis 1: 27, 27.63
Hypothesis 2: 28, 26.80
Hypothesis 3: 27, 27.10
Hypothesis 4: 1000, 27.81
benchmark/driverlog/monitoring/30/P01
Correct hypothesis: 2
Hypothesis 0: 35, 32.63
Hypothesis 1: 42, 32.90
Hypothesis 2: 32, 30.53
Hypothesis 3: 39, 32.77
Hypothesis 4: 36, 31.91
benchmark/driverlog/monitoring/30/P02
Correct hypothesis: 2
Hypothesis 0: 1000, 22.88
Hypothesis 1: 1000, 23.41
Hypothesis 2: 25, 22.76
Hypothesis 3: 1000, 23.24
Hypothesis 4: 1000, 23.02
benchmark/driverlog/monitoring/30/P03
Correct hypothesis: 1
Hypothesis 0: 39, 31.36
Hypothesis 1: 32, 30.25
Hypothesis 2: 42, 31.83
Hypothesis 3: 41, 32.26
Hypothesis 4: 36, 31.88
benchmark/driverlog/monitoring/30/P04
Correct hypothesis: 2
Hypothesis 0: 1000, 27.68
Hypothesis 1: 27, 28.01
Hypothesis 2: 27, 27.27
Hypothesis 3: 28, 26.74
Hypothesis 4: 1000, 28.12
Hypothesis 5: 1000, 27.37
benchmark/driverlog/monitoring/30/P05
Correct hypothesis: 

In [5]:
# PREDICTION

task = "prediction"
launch_experiments(domain, task, timeout)

benchmark/driverlog/prediction/30/P00
Correct hypothesis: 4
Hypothesis 0: 46, 22.32
Hypothesis 1: 48, 22.92
Hypothesis 2: 50, 23.27
Hypothesis 3: 47, 22.75
Hypothesis 4: 41, 23.09
Hypothesis 5: 45, 23.12
benchmark/driverlog/prediction/30/P01
Correct hypothesis: 2
Hypothesis 0: 48, 31.55
Hypothesis 1: 49, 31.73
Hypothesis 2: 43, 31.62
Hypothesis 3: 46, 32.74
Hypothesis 4: 46, 32.83
benchmark/driverlog/prediction/30/P02
Correct hypothesis: 3
Hypothesis 0: 47, 23.36
Hypothesis 1: 47, 22.81
Hypothesis 2: 48, 24.62
Hypothesis 3: 41, 23.47
Hypothesis 4: 45, 23.03
benchmark/driverlog/prediction/30/P03
Correct hypothesis: 3
Hypothesis 0: 49, 32.30
Hypothesis 1: 47, 31.87
Hypothesis 2: 49, 32.19
Hypothesis 3: 44, 32.55
Hypothesis 4: 47, 31.82
benchmark/driverlog/prediction/30/P04
Correct hypothesis: 1
Hypothesis 0: 50, 22.77
Hypothesis 1: 41, 21.93
Hypothesis 2: 46, 23.37
Hypothesis 3: 45, 23.20
Hypothesis 4: 47, 23.95
Hypothesis 5: 48, 23.05
benchmark/driverlog/prediction/30/P05
Correct hypoth

In [6]:
# HINDSIGHT

task = "hindsight"
launch_experiments(domain, task, timeout)

benchmark/driverlog/hindsight/30/P00
Correct hypothesis: 2
Hypothesis 0: 1000, 121.07
Hypothesis 1: 47, 41.25
Hypothesis 2: 41, 40.64
Hypothesis 3: 1000, 40.99
Hypothesis 4: 45, 41.20
benchmark/driverlog/hindsight/30/P01
Correct hypothesis: 0
Hypothesis 0: 42, 41.57
Hypothesis 1: 1000, 40.98
Hypothesis 2: 48, 40.72
Hypothesis 3: 44, 40.66
Hypothesis 4: 1000, 41.12
benchmark/driverlog/hindsight/30/P02
Correct hypothesis: 4
Hypothesis 0: 44, 42.05
Hypothesis 1: 1000, 40.37
Hypothesis 2: 1000, 40.20
Hypothesis 3: 45, 40.88
Hypothesis 4: 39, 38.58
benchmark/driverlog/hindsight/30/P03
Correct hypothesis: 4
Hypothesis 0: 1000, 42.05
Hypothesis 1: 1000, 40.34
Hypothesis 2: 42, 40.73
Hypothesis 3: 47, 40.94
Hypothesis 4: 40, 41.18
Hypothesis 5: 43, 41.13
benchmark/driverlog/hindsight/30/P04
Correct hypothesis: 0
Hypothesis 0: 40, 39.11
Hypothesis 1: 46, 41.63
Hypothesis 2: 47, 40.97
Hypothesis 3: 46, 40.74
Hypothesis 4: 45, 41.12
Hypothesis 5: 46, 40.61
benchmark/driverlog/hindsight/30/P05
Cor