In [1]:
num_iters = 500
env_num = 2
spec_num = 7

In [2]:
from conformal.all_paths_conformal_pred import all_paths_conformal_pred
from conformal.bucketed_conformal_pred import bucketed_conformal_pred
from conformal.nonconformity_score_graph import DIRLCumRewardScoreGraph, DIRLTimeTakenScoreGraph
from spectrl.hierarchy.construction import adj_list_from_task_graph, automaton_graph_from_spec
from spectrl.hierarchy.reachability import HierarchicalPolicy, ConstrainedEnv
from spectrl.main.spec_compiler import ev, seq, choose, alw
from spectrl.util.io import parse_command_line_options, save_log_info, save_object
from spectrl.util.rl import print_performance, get_rollout
from spectrl.rl.ars import HyperParams

from spectrl.examples.rooms_envs import (
    GRID_PARAMS_LIST,
    MAX_TIMESTEPS,
    START_ROOM,
    FINAL_ROOM,
)
from spectrl.envs.rooms import RoomsEnv

import os


render = False
folder = ''
itno = -1

log_info = []

grid_params = GRID_PARAMS_LIST[env_num]

hyperparams = HyperParams(30, num_iters, 30, 15, 0.05, 0.3, 0.15)

print(
    "\n**** Learning Policy for Spec #{} in Env #{} ****".format(
        spec_num, env_num
    )
)

# Step 1: initialize system environment
system = RoomsEnv(grid_params, START_ROOM[env_num], FINAL_ROOM[env_num])

# Step 4: List of specs.
if env_num == 2:
    bottomright = (0, 2)
    topleft = (2, 0)
if env_num == 3 or env_num == 4:
    bottomright = (0, 3)
    topleft = (3, 0)

# test specs
spec0 = ev(grid_params.in_room(FINAL_ROOM[env_num]))
spec1 = seq(
    ev(grid_params.in_room(FINAL_ROOM[env_num])),
    ev(grid_params.in_room(START_ROOM[env_num])),
)
spec2 = ev(grid_params.in_room(topleft))

# Goto destination, return to initial
spec3 = seq(
    ev(grid_params.in_room(topleft)),
    ev(grid_params.in_room(START_ROOM[env_num])),
)
# Choose between top-right and bottom-left blocks (Same difficulty - learns 3/4 edges)
spec4 = choose(
    ev(grid_params.in_room(bottomright)), ev(grid_params.in_room(topleft))
)
# Choose between top-right and bottom-left, then go to Final state (top-right).
# Only one path is possible (learns 5/5 edges. Should have a bad edge)
spec5 = seq(
    choose(
        ev(grid_params.in_room(bottomright)), ev(grid_params.in_room(topleft))
    ),
    ev(grid_params.in_room(FINAL_ROOM[env_num])),
)
# Add obsacle towards topleft
spec6 = alw(grid_params.avoid_center((1, 0)), ev(grid_params.in_room(topleft)))
# Either go to top-left or bottom-right. obstacle on the way to top-left.
# Then, go to Final state. Only one route is possible
spec7 = seq(
    choose(
        alw(grid_params.avoid_center((1, 0)), ev(grid_params.in_room(topleft))),
        ev(grid_params.in_room(bottomright)),
    ),
    ev(grid_params.in_room(FINAL_ROOM[env_num])),
)

specs = [spec0, spec1, spec2, spec3, spec4, spec5, spec6, spec7]

# Step 3: construct abstract reachability graph
_, abstract_reach = automaton_graph_from_spec(specs[spec_num])
print("\n**** Abstract Graph ****")
abstract_reach.pretty_print()

# Step 5: Learn policy
path_policies = abstract_reach.learn_all_paths(
    system,
    hyperparams,
    res_model=None,
    max_steps=20,
    render=render,
    neg_inf=-100,
    safety_penalty=-1,
    num_samples=500,
    use_gpu=True,
)

adj_list = adj_list_from_task_graph(abstract_reach.abstract_graph)
terminal_vertices = [i for i in range(len(adj_list)) if i in adj_list[i]]



**** Learning Policy for Spec #7 in Env #2 ****

**** Abstract Graph ****
0 -> 1 2
1 -> 3
2 -> 3
3 -> 3

Learning policy for edge 0 -> 1



  logger.warn(



Steps taken at iteration 0: 1260
Time taken at iteration 0: 0.009764838218688964 mins
Expected reward at iteration 0: -358.2134984186313

Steps taken at iteration 1: 2520
Time taken at iteration 1: 0.019490158557891844 mins
Expected reward at iteration 1: -346.93042711132637

Steps taken at iteration 2: 3780
Time taken at iteration 2: 0.029172372817993165 mins
Expected reward at iteration 2: -339.1622479223143

Steps taken at iteration 3: 5040
Time taken at iteration 3: 0.03887265125910441 mins
Expected reward at iteration 3: -337.21610281404423

Steps taken at iteration 4: 6300
Time taken at iteration 4: 0.048585915565490724 mins
Expected reward at iteration 4: -332.4443357651369

Steps taken at iteration 5: 7560
Time taken at iteration 5: 0.05837599436442057 mins
Expected reward at iteration 5: -325.552448724378

Steps taken at iteration 6: 8820
Time taken at iteration 6: 0.06818817456563314 mins
Expected reward at iteration 6: -310.2438867124019

Steps taken at iteration 7: 10080
T

KeyboardInterrupt: 

In [None]:
#### e = 0.1 ; time taken scores

time_taken_score_graph = DIRLTimeTakenScoreGraph(adj_list, path_policies)
e = 0.1
n_samples = 500
total_buckets = 100
vbs = bucketed_conformal_pred(time_taken_score_graph, e, total_buckets, n_samples)
min_path, min_path_scores = all_paths_conformal_pred(time_taken_score_graph, e, n_samples)

vb = vbs.buckets[(terminal_vertices[0], total_buckets)]
print("Bucketed:")
print(vb.path)
print(vb.path_buckets)
print(vb.path_score_quantiles)
print(max(vb.path_score_quantiles))

print()
print("All paths:")
print(min_path)
print(min_path_scores)
print(max(min_path_scores))

Bucketed:
[0, 1, 3]
[82, 18]
[21, 20]
21

All paths:
(0, 1, 3)
[21, 19]
21


In [4]:
#### e = 0.1 ; time taken scores

time_taken_score_graph = DIRLTimeTakenScoreGraph(adj_list, path_policies)
e = 0.1
n_samples = 500
total_buckets = 200
vbs = bucketed_conformal_pred(time_taken_score_graph, e, total_buckets, n_samples)
min_path, min_path_scores = all_paths_conformal_pred(time_taken_score_graph, e, n_samples)

vb = vbs.buckets[(terminal_vertices[0], total_buckets)]
print("Bucketed:")
print(vb.path)
print(vb.path_buckets)
print(vb.path_score_quantiles)
print(max(vb.path_score_quantiles))

print()
print("All paths:")
print(min_path)
print(min_path_scores)
print(max(min_path_scores))

Bucketed:
[0, 1, 3]
[172, 28]
[21, 20]
21

All paths:
(0, 1, 3)
[21, 19]
21


In [None]:
#### e = 0.1 ; time taken scores

time_taken_score_graph = DIRLTimeTakenScoreGraph(adj_list, path_policies)
e = 0.05
n_samples = 500
total_buckets = 2
vbs = bucketed_conformal_pred(time_taken_score_graph, e, total_buckets, n_samples)
min_path, min_path_scores = all_paths_conformal_pred(time_taken_score_graph, e, n_samples)

vb = vbs.buckets[(terminal_vertices[0], total_buckets)]
print("Bucketed:")
print(vb.path)
print(vb.path_buckets)
print(vb.path_score_quantiles)
print(max(vb.path_score_quantiles))

print()
print("All paths:")
print(min_path)
print(min_path_scores)
print(max(min_path_scores))

Bucketed:
[0, 1, 3]
[1, 1]
[21, 20]
21

All paths:
(0, 1, 3)
[21, 20]
21


In [40]:
time_taken_score_graph = DIRLTimeTakenScoreGraph(adj_list, path_policies)
n_samples = 500
es = [0.2, 0.1, 0.05]
total_buckets = [5, 10, 20, 25]

data_time_taken = dict()
data_time_taken["metadata"] = {"es": es, "total_buckets": total_buckets, "scores": "time-taken", "env": "9-rooms", "spec": spec_num, "n_samples": n_samples}

for e in es:
    e_data = dict()
    for buckets in total_buckets:
        bucket_data = dict()
        vbs = bucketed_conformal_pred(time_taken_score_graph, e, buckets, n_samples)
        min_path, min_path_scores = all_paths_conformal_pred(time_taken_score_graph, e, n_samples)
        vb = vbs.buckets[(terminal_vertices[0], buckets)]

        bucket_data["bucketed"] = {"path": vb.path, 
                                   "path_buckets": vb.path_buckets, 
                                   "path_score_quantiles": vb.path_score_quantiles, 
                                   "max_path_score_quantile": max(vb.path_score_quantiles)}
        bucket_data["all-paths"] = {"path": min_path, "min_path_scores": min_path_scores, "max_min_path_scores": max(min_path_scores)}
        e_data[buckets] = bucket_data
    data_time_taken[str(e)] = e_data

changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again


In [41]:
import json

# Convert the Python object to a JSON string
json_data = json.dumps(data_time_taken, indent=2)

# Store the JSON string in a file
with open("conformal_experiments_data/9rooms-spec7-time-taken.json", "w") as json_file:
    json_file.write(json_data)

In [39]:
from importlib import reload
import conformal.all_paths_conformal_pred as ap
import conformal.bucketed_conformal_pred as bcp
reload(bcp)
import conformal.bucketed_conformal_pred as bcp
from conformal.all_paths_conformal_pred import all_paths_conformal_pred
from conformal.bucketed_conformal_pred import bucketed_conformal_pred

time_taken_score_graph = DIRLCumRewardScoreGraph(adj_list, path_policies)
n_samples = 500
es = [0.2, 0.1, 0.05]
total_buckets = [5, 10, 20, 25, 50]

data_time_taken = dict()
data_time_taken["metadata"] = {"es": es, "total_buckets": total_buckets, "scores": "time-taken", "env": "9-rooms", "spec": spec_num, "n_samples": n_samples}

for e in es:
    e_data = dict()
    for buckets in total_buckets:
        bucket_data = dict()
        vbs = bcp.bucketed_conformal_pred(time_taken_score_graph, e, buckets, n_samples)
        min_path, min_path_scores = ap.all_paths_conformal_pred(time_taken_score_graph, e, n_samples)
        vb = vbs.buckets[(terminal_vertices[0], buckets)]

        bucket_data["bucketed"] = {"path": vb.path, 
                                   "path_buckets": vb.path_buckets, 
                                   "path_score_quantiles": vb.path_score_quantiles, 
                                   "max_path_score_quantile": max(vb.path_score_quantiles)}
        bucket_data["all-paths"] = {"path": min_path, "min_path_scores": min_path_scores, "max_min_path_scores": max(min_path_scores)}
        e_data[buckets] = bucket_data
    data_time_taken[str(e)] = e_data

# Convert the Python object to a JSON string
json_data = json.dumps(data_time_taken, indent=2)

# Store the JSON string in a file
with open("conformal_experiments_data/9rooms-spec7-cum-reward.json", "w") as json_file:
    json_file.write(json_data)

changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again
changed again


In [32]:
vbs.buckets[(3,15)].path_score_quantiles

[np.float64(-0.002153621251416382), np.float64(-0.0075586513796928045)]