In [1]:
from traffic_env import TrafficEnvironment
import numpy as np
import itertools
import docker, os
from sklearn.tree import DecisionTreeClassifier, export_text

Last used: Sep 12

## SKlearn extract Q-learning strategy as tree -- Example

In [2]:
def choose_action(state, q_table, action_space):
    return np.argmax(q_table[state])

In [16]:
def evaluate_cost(tree, data, labels, costs):
    predictions = tree.predict(data)
    incorrect = predictions != labels
    cost_function_value = np.sum(costs[incorrect])
    return cost_function_value


def extract_rules(tree, feature_names, class_names, max_depth=None):
    class_rules = {cls: [] for cls in class_names}

    def recurse(node, depth, conditions):
        if depth > max_depth:
            return
        if tree.tree_.children_left[node] == tree.tree_.children_right[node]:  # Leaf node
            # Find the dominant class at this leaf
            outputs = tree.tree_.value[node][0]
            class_index = outputs.argmax()
            class_name = class_names[class_index]
            # Create rule as a conjunction of conditions
            rule = " & ".join(f"({cond})" for cond in conditions)
            class_rules[class_name].append(rule)
            return

        # Condition for the left child
        if tree.tree_.children_left[node] != -1:
            left_condition = f"{feature_names[tree.tree_.feature[node]]} <= {tree.tree_.threshold[node]:.4f}"
            recurse(tree.tree_.children_left[node], depth + 1, conditions + [left_condition])

        # Condition for the right child
        if tree.tree_.children_right[node] != -1:
            right_condition = f"{feature_names[tree.tree_.feature[node]]} > {tree.tree_.threshold[node]:.4f}"
            recurse(tree.tree_.children_right[node], depth + 1, conditions + [right_condition])

    # Start recursion from the root with an empty condition list
    recurse(0, 0, [])
    return class_rules

In [18]:
def flatten_space(space, values):
    low = space.low
    shape = space.shape
    flat_value = 0
    for i, (val, lo) in enumerate(zip(values, low)):
        flat_value *= (space.high[i] - lo + 1)
        flat_value += (val - lo)
    return int(flat_value)

In [19]:
env = TrafficEnvironment(paramsfile="params_files/params_example.json")
env.do_render = False

# Discretize the observation and action spaces
observation_space = env.observation_space
action_space = env.action_space

# Initialize Q-table
# q_table = init_q_table(observation_space, action_space)

q_table = np.load('qtable_saved3.npy', allow_pickle=True)

observation, info = env.reset()
state = flatten_space(observation_space, observation)  # Flatten observation into discrete state

[0.  0.4 0.4 0.2] [0.44503384 0.39451191 0.13210815 0.0283461 ]


In [20]:
# low = observation_space.low
# high = observation_space.high

# acts = {-1: [], 0: [], 1: []}
# reversed_acts = {}
# overall_min_q_value = np.min(q_table)

# # Dynamically create ranges for each dimension based on the shape
# ranges = [range(int(low[i]), int(high[i]) + 1) for i in range(observation_space.shape[0])]

# # Iterate over all combinations of values in the space
# for combination in itertools.product(*ranges):
#     flatstate = flatten_space(observation_space, combination)
#     action = np.argmax(q_table[flatstate])
#     cost = q_table[flatstate,action] - overall_min_q_value
# #     action, cost = choose_action(flatstate, q_table, action_space)
#         # Unflatten action index to get corresponding action values
#     action_unflattened = np.unravel_index(action, [int(high - low + 1) for low, high in zip(action_space.low, action_space.high)])
#     action_unflattened = [int(low + val) for val, low in zip(action_unflattened, action_space.low)]  # Convert back to original scale
#     car_action = action_unflattened[0]
#     acts[car_action].append(combination)
#     reversed_acts[combination] = (car_action, cost)
#     #     print(combination)
    

In [21]:
## EFFICIENT_VERSION

# Compute the Cartesian product of these ranges and Convert the list of tuples to a NumPy array
ranges = [range(int(observation_space.low[i]), int(observation_space.high[i]) + 1) for i in range(observation_space.shape[0])]
obs_data = np.array(list(itertools.product(*ranges)))

# same for action space
ranges = [range(int(action_space.low[i]), int(action_space.high[i]) + 1) for i in range(action_space.shape[0])]
act_data = np.array(list(itertools.product(*ranges)))

overall_min_q_value = np.min(q_table)

action_chosen = act_data[np.argmax(q_table, axis=1),:][:,0]
costs = np.max(q_table, axis=1) - overall_min_q_value

In [22]:
feature_names = ['car_x', 'car_v', 'ped_x', 'ped_y']
class_names = [-1, 0, 1]
max_tree_depth = 8


# Train the decision tree with costs
tree = DecisionTreeClassifier(max_depth=max_tree_depth)
tree.fit(obs_data, action_chosen, sample_weight=costs)
# Calculate the cost function value
cost_value = evaluate_cost(tree, obs_data, action_chosen, costs)
print(f"Cost function value: {100*cost_value/np.sum(costs)}%")

# Extract rules
rules = extract_rules(tree, feature_names, class_names, max_tree_depth)

# Print rules for each class
for class_name, rule_list in rules.items():
    print(f"Rules for class {class_name}:")
    rulestr = "  - "
    for rule in rule_list:
        rulestr += f"{rule} | "
#         print(f"  - If {rule}")
    print(rulestr, "\n")

# export_text(tree, feature_names=feature_names, show_weights=False, max_depth=max_tree_depth)

Cost function value: 19.52947044606372%
Rules for class -1:
  - (ped_y <= 4.5000) & (car_v <= 3.5000) & (ped_x <= 55.5000) & (ped_y <= 2.5000) & (car_x <= 27.5000) & (car_x <= 11.5000) & (ped_y > 0.5000) & (car_v > 0.5000) | (ped_y <= 4.5000) & (car_v <= 3.5000) & (ped_x <= 55.5000) & (ped_y <= 2.5000) & (car_x > 27.5000) & (car_x > 59.5000) | (ped_y <= 4.5000) & (car_v <= 3.5000) & (ped_x <= 55.5000) & (ped_y > 2.5000) & (car_v <= 2.5000) & (car_x <= 17.5000) & (car_v > 0.5000) & (car_x <= 14.5000) | (ped_y <= 4.5000) & (car_v <= 3.5000) & (ped_x <= 55.5000) & (ped_y > 2.5000) & (car_v <= 2.5000) & (car_x <= 17.5000) & (car_v > 0.5000) & (car_x > 14.5000) | (ped_y <= 4.5000) & (car_v <= 3.5000) & (ped_x <= 55.5000) & (ped_y > 2.5000) & (car_v <= 2.5000) & (car_x > 17.5000) & (car_x > 59.5000) & (ped_y <= 3.5000) | (ped_y <= 4.5000) & (car_v <= 3.5000) & (ped_x <= 55.5000) & (ped_y > 2.5000) & (car_v <= 2.5000) & (car_x > 17.5000) & (car_x > 59.5000) & (ped_y > 3.5000) | (ped_y <= 4.50

### Full strategy version -- way too much for storm

In [13]:
# accstr = ""
# firstone=True
# for obs in acts[1]:
#     if firstone:
#         firstone=False
#     else:
#         accstr += " | \n"
#     accstr += f"(car_x={obs[0]} & car_v={obs[1]} & ped_x={obs[2]} & ped_y={obs[3]})"
# accstr = f"({accstr})"

# with open('accstr.txt', 'w') as fp:
#     fp.write(accstr)

## Docker Example

In [67]:
client = docker.from_env()     
aux = client.containers.run("lposch/tempest-devel-traces:latest", f"storm --prism tmp/mdp.pm --prop prism_files/mdp_props.pm --trace-input tmp/tracepm.txt --buildstateval", volumes = {os.getcwd(): {'bind': '/mnt/vol1', 'mode': 'rw'}}, working_dir = "/mnt/vol1", stderr = True)

ContainerError: Command 'storm --prism tmp/mdp.pm --prop prism_files/mdp_props.pm --trace-input tmp/tracepm.txt --buildstateval' in image 'lposch/tempest-devel-traces:latest' returned non-zero exit status 1: b''

In [18]:
client = docker.from_env()     
stormcommand = f"storm --prism tmp/mdp.pm --prop prism_files/mdp_props.pm --trace-input tmp/tracepm.txt --exportresult mdppropos.txt --buildstateval"
aux = client.containers.run("lposch/tempest-devel-traces:latest", stormcommand, 
                        volumes = {os.getcwd(): {'bind': '/mnt/vol1', 'mode': 'rw'}}, 
                        working_dir = "/mnt/vol1", stderr = True,
                       stdout=True,
    detach=True # Run the container in detached mode to capture logs later
                       )
result = aux.wait()  # Wait for the container to finish
logs = aux.logs().decode("utf-8")  # Capture the logs
restr = "Result (for initial states): "

In [19]:
print(logs)

Storm 1.6.3

Date: Wed Sep 11 12:25:09 2024
Command line arguments: --prism tmp/mdp.pm --prop prism_files/mdp_props.pm --trace-input tmp/tracepm.txt --exportresult mdppropos.txt --buildstateval
Current working directory: /mnt/vol1

Time for model input parsing: 0.011s.

 WARN (TraceInputFileParser.h:30): State "turn=1 & crashed=0 & car_x=42 & car_v=0 & finished=0 & ped_x=46 & ped_y=8" is invalid, this state is removed!
 WARN (TraceInputFileParser.h:30): State "turn=1 & crashed=0 & car_x=42 & car_v=0 & finished=0 & ped_x=49 & ped_y=8" is invalid, this state is removed!
 WARN (TraceInputFileParser.h:30): State "turn=1 & crashed=0 & car_x=44 & car_v=2 & finished=0 & ped_x=46 & ped_y=10" is invalid, this state is removed!
 WARN (TraceInputFileParser.h:30): State "turn=1 & crashed=0 & car_x=49 & car_v=5 & finished=0 & ped_x=49 & ped_y=12" is invalid, this state is removed!
 WARN (TraceInputFileParser.h:30): State "turn=1 & crashed=0 & car_x=54 & car_v=4 & finished=0 & ped_x=46 & ped_y=13" i

In [79]:
result

{'StatusCode': 139}