In [1]:
from planning_inference.parsers import parse_model, parse_problem, parse_plan, parse_trajectory, parse_hypothesis, parse_observation_sequence
from planning_inference.generator import generate_trajectory
from planning_inference.functions import generate_all_literals, get_matching_literals

from planning_inference.pddl import Conjunction, Literal, Type, TypedObject, Effect, Truth, NumericConstant, PrimitiveNumericExpression, Increase
from planning_inference.pddl import SensorModel

from planning_inference.observations import Trajectory, Hypothesis, State

from planning_inference import DecodingTask

from sensor_models import load_sensor_model

import os
import copy
from collections import defaultdict
from itertools import combinations
from random import choice, choices, shuffle
from statistics import mean
import glob
import time

In [2]:
def evaluate(domain, task, table_format=False):

    observabilities = [30, 50, 70]       
    for observability in observabilities:
        base_path = "benchmark/%s/%s/%s/" % (domain, task, str(observability))

        problems = sorted(glob.glob(base_path + "*"))

        H_sizes = []
        H_star_sizes = []
        found_correct = []
        times = []

        for problem in problems:
            h_costs = []
            h_times = []

            with open(problem + "/costs", "r") as f:
                h_costs = [int(c) for c in f.read().strip().split(" ")]

            with open(problem + "/times", "r") as f:
                h_times = [float(t) for t in f.read().strip().split(" ")]

            # Real Hypothesis
            with open(problem + "/sol", "r") as f:
                correct_h = int(f.read())

            # Hypotheses
            h_files = sorted(glob.glob(problem + "/hyp*"))

            H_sizes += [len(h_files)]

            min_cost = min(h_costs)
            H_star = [i for i in range(len(h_costs)) if h_costs[i] == min_cost]

            H_star_sizes += [len(H_star)]

            found_correct += [correct_h in H_star]

            times += [sum(h_times)]

        quality = len([found for found in found_correct if found])/len(found_correct)
        
        if table_format:
            print("%.2f & %.2f & %.2f & %.2f" % (mean(H_sizes), mean(H_star_sizes), quality, mean(times)))
        else:
            print("%s: %s at %s%%" % (domain, task, str(observability)))
            print("|H| = %.2f, |H*| = %.2f, Q = %.2f, T = %.2f" % (mean(H_sizes), mean(H_star_sizes), quality, mean(times)))


In [7]:
domain = "openstacks"
table_format = False

In [8]:
# MONITORING

task = "monitoring"
evaluate(domain, task, table_format = table_format)

openstacks: monitoring at 30%
|H| = 5.60, |H*| = 2.30, Q = 0.80, T = 5.93
openstacks: monitoring at 50%
|H| = 5.60, |H*| = 2.20, Q = 0.70, T = 6.81
openstacks: monitoring at 70%
|H| = 5.60, |H*| = 2.30, Q = 0.80, T = 9.60


In [9]:
# HINDSIGHT

task = "hindsight"
evaluate(domain, task, table_format = table_format)

openstacks: hindsight at 30%
|H| = 5.80, |H*| = 2.00, Q = 1.00, T = 11.19
openstacks: hindsight at 50%
|H| = 5.80, |H*| = 1.70, Q = 1.00, T = 11.32
openstacks: hindsight at 70%
|H| = 5.80, |H*| = 1.60, Q = 1.00, T = 14.28


In [10]:
# PREDICTION

task = "prediction"
evaluate(domain, task, table_format = table_format)

openstacks: prediction at 30%
|H| = 5.90, |H*| = 2.50, Q = 1.00, T = 15.70
openstacks: prediction at 50%
|H| = 5.90, |H*| = 2.50, Q = 1.00, T = 20.30
openstacks: prediction at 70%
|H| = 5.90, |H*| = 2.50, Q = 1.00, T = 18.77
