In [1]:
num_iters = 300
env_num = 5
spec_num = 0
use_gpu = True

In [2]:
from conformal.all_paths_conformal_pred import all_paths_conformal_pred
from conformal.bucketed_conformal_pred import bucketed_conformal_pred
from conformal.nonconformity_score_graph import DIRLCumRewardScoreGraph, DIRLTimeTakenScoreGraph
from spectrl.hierarchy.construction import adj_list_from_task_graph, automaton_graph_from_spec
from spectrl.hierarchy.reachability import HierarchicalPolicy, ConstrainedEnv
from spectrl.main.spec_compiler import ev, seq, choose, alw
from spectrl.rl.ddpg.ddpg import DDPGParams
from spectrl.util.io import parse_command_line_options, save_log_info, save_object
from spectrl.util.rl import print_performance, get_rollout
from spectrl.rl.ars import HyperParams

from spectrl.examples.rooms_envs import (
    GRID_PARAMS_LIST,
    MAX_TIMESTEPS,
    START_ROOM,
    FINAL_ROOM,
)
from spectrl.envs.rooms import RoomsEnv

import os

In [None]:
render = False
folder = ''
itno = -1

log_info = []

grid_params = GRID_PARAMS_LIST[env_num]

hyperparams = HyperParams(30, num_iters, 30, 15, 0.05, 0.3, 0.15)

print(
    "\n**** Learning Policy for Spec #{} in Env #{} ****".format(
        spec_num, env_num
    )
)

# Step 1: initialize system environment
system = RoomsEnv(grid_params, START_ROOM[env_num], FINAL_ROOM[env_num])

# state_dim = system.observation_space.shape[0]
# action_dim = system.action_space.shape[0]
# action_bound = system.action_space.high
# hyperparams = DDPGParams(state_dim, action_dim, action_bound,
#                             minibatch_size=256, num_episodes=num_iters,
#                             discount=0.95, actor_hidden_dim=256,
#                             critic_hidden_dim=256, epsilon_decay=3e-6,
#                             decay_function='linear', steps_per_update=100,
#                             gradients_per_update=100, buffer_size=200000,
#                             sigma=0.15, epsilon_min=0.3, target_noise=0.0003,
#                             target_clip=0.003, warmup=1000)

# Step 4: List of specs.
if env_num == 2 or env_num == 5:
    bottomright = (0, 2)
    topleft = (2, 0)
if env_num == 3 or env_num == 4:
    bottomright = (0, 3)
    topleft = (3, 0)

spec0 = seq(
    choose(
        ev(grid_params.in_room(topleft)),
        alw(grid_params.avoid_center_without_scaling((1, 0)), ev(grid_params.in_room(bottomright)))
    ),
    ev(grid_params.in_room(FINAL_ROOM[env_num]))
)
spec1 = seq(
    alw(grid_params.avoid_center_without_scaling((1, 0)), ev(grid_params.in_room(bottomright))),
    ev(grid_params.in_room(FINAL_ROOM[env_num]))
)
spec2 = alw(grid_params.avoid_center_without_scaling((1, 0), 5), ev(grid_params.in_room(bottomright)))
spec3 = seq(
    ev(grid_params.in_room(bottomright)),
    ev(grid_params.in_room(FINAL_ROOM[env_num]))
)

specs = [spec0, spec1, spec2, spec3]

# Step 3: construct abstract reachability graph
_, abstract_reach = automaton_graph_from_spec(specs[spec_num])
print("\n**** Abstract Graph ****")
abstract_reach.pretty_print()

# Step 5: Learn policy
path_policies = abstract_reach.learn_all_paths(
    system,
    hyperparams,
    res_model=None,
    max_steps=40,
    render=render,
    neg_inf=-100,
    safety_penalty=-1,
    num_samples=500,
    use_gpu=use_gpu,
    # algo="ddpg",
    alpha=0,
)

adj_list = adj_list_from_task_graph(abstract_reach.abstract_graph)
terminal_vertices = [i for i in range(len(adj_list)) if i in adj_list[i]]

import dill as pickle

with open("conformal_experiments_data/9rooms-counterexample-policies/path_policies.pkl", "wb") as f:
    pickle.dump(path_policies, f)

with open("conformal_experiments_data/9rooms-counterexample-policies/adj_list.pkl", "wb") as f:
    pickle.dump(adj_list, f)

with open("conformal_experiments_data/9rooms-counterexample-policies/terminal_vertices.pkl", "wb") as f:
    pickle.dump(terminal_vertices, f)


In [3]:
from conformal.all_paths_conformal_pred import all_paths_conformal_pred
from conformal.bucketed_conformal_pred import bucketed_conformal_pred
from conformal.calculate_coverage import calculate_coverage
from conformal.nonconformity_score_graph import DIRLCumRewardScoreGraph

import dill as pickle
import json

with open("conformal_experiments_data/9rooms-counterexample-policies/path_policies.pkl", "rb") as f:
    path_policies = pickle.load(f)

with open("conformal_experiments_data/9rooms-counterexample-policies/adj_list.pkl", "rb") as f:
    adj_list = pickle.load(f)

with open("conformal_experiments_data/9rooms-counterexample-policies/terminal_vertices.pkl", "rb") as f:
    terminal_vertices = pickle.load(f)

cum_reward_score_graph = DIRLCumRewardScoreGraph(adj_list, path_policies, cum_reward_type="cum_safety_reach_reward")
n_samples = 1000
n_samples_coverage = 1000
es = [0.1]
total_buckets = [100]

data_cum_reward = dict()
data_cum_reward["metadata"] = {"es": es, "total_buckets": total_buckets, "scores": "cum-reward", "env": "9-rooms", "spec": spec_num, "n_samples": n_samples}

for e in es:
    e_data = dict()
    for buckets in total_buckets:
        bucket_data = dict()
        vbs = bucketed_conformal_pred(cum_reward_score_graph, e, buckets, n_samples)
        min_path, min_path_scores = all_paths_conformal_pred(cum_reward_score_graph, e, n_samples)
        vb = vbs.buckets[(terminal_vertices[0], buckets)]

        bucket_data["bucketed"] = {"path": vb.path, 
                                   "path_buckets": vb.path_buckets, 
                                   "path_score_quantiles": vb.path_score_quantiles, 
                                   "max_path_score_quantile": max(vb.path_score_quantiles)}
        bucket_data["all-paths"] = {"path": min_path, "min_path_scores": min_path_scores, "max_min_path_scores": max(min_path_scores)}

        bucket_data["bucketed-coverage"] = calculate_coverage(
            cum_reward_score_graph, vb.path, vb.path_score_quantiles, n_samples_coverage
        )
        bucket_data["all-paths-coverage"] = calculate_coverage(
            cum_reward_score_graph, 
            min_path, 
            [max(min_path_scores) for _ in range(len(min_path)-1)], 
            n_samples_coverage,
        )
        e_data[buckets] = bucket_data
    data_cum_reward[str(e)] = e_data

# Convert the Python object to a JSON string
json_data = json.dumps(data_cum_reward, indent=2)

# Store the JSON string in a file
with open("conformal_experiments_data/9rooms-counterexample-cum-reward.json", "w") as json_file:
    json_file.write(json_data)


In [4]:
trajectories_scores1 = cum_reward_score_graph.sample_full_path_cached([0, 1, 3], n_samples)
trajectories_scores2 = cum_reward_score_graph.sample_full_path_cached([0, 2, 3], n_samples)

successful_trajs1, successful_trajs2 = 0, 0

for i in range(n_samples):
    if all(trajectories_scores1[i][j] <= 0 for j in range(2)):
        successful_trajs1 += 1
    if all(trajectories_scores2[i][j] <= 0 for j in range(2)):
        successful_trajs2 += 1

print("path 1 success prob " + str(successful_trajs1/n_samples))
print("path 2 success prob " + str(successful_trajs2/n_samples))

path 1 success prob 0.973
path 2 success prob 0.993


In [6]:
print(trajectories_scores2)

[[-23.279196897313724, -4.780568619686328], [-24.78266369012938, -4.896583034884593], [-24.630405816333912, -4.591562855589146], [-23.75115543628509, -4.704660475459537], [-24.33218874890852, -4.605590368757497], [-23.164933394275508, -4.8075590531543355], [-23.921523226529466, -4.913000561381011], [-24.98717075372457, -4.247788583582796], [-24.661279466318852, -4.263461059086882], [-24.724561091815644, -4.251335059421667], [-23.846049535097787, -4.7022293882108315], [-24.7948312015058, -4.735838545907239], [-23.16744183704866, -4.5197500700002955], [-24.53805664776075, -4.2978213491044315], [-23.052945707609837, -4.735910327189263], [-24.345708006624598, -4.2961220196084575], [-23.859609029576074, -4.6551340539929384], [-23.631014642917833, -4.769009724961535], [-23.449418950516375, -4.194721272927499], [-23.035731853465762, -4.261009029830657], [-23.061353931810192, -4.349923646041589], [-24.13162222059345, -4.366370309560356], [-23.83866739083498, -4.347836011036222], [-23.007416570