In [1]:
"""Probability extractor"""

import argparse
from datetime import datetime
import gym
import os
import sys
import time
import matplotlib.pyplot as plt
import numpy as np
import PyQt5

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune.registry import register_env

from flow.core.util import emission_to_csv
from flow.utils.registry import make_create_env
from flow.utils.rllib import get_flow_params
from flow.utils.rllib import get_rllib_config
from flow.utils.rllib import get_rllib_pkl

from examples.rllib.multiagent_exps.test_predictor.pedestrian_policy_1 import create_env, create_agent
from examples.rllib.multiagent_exps.bayesian_0_training_script import make_flow_params as bayesian_1_flow_params

EXAMPLE_USAGE = """
example usage:
    python ./visualizer_rllib.py /ray_results/experiment_dir/result_dir 1
Here the arguments are:
1 - the path to the simulation results
2 - the number of the checkpoint
"""

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
%matplotlib inline

In [3]:
def run_env(env, agent, config, flow_params):

    # set up relevant policy and env
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        multiagent = False
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [np.zeros(size, np.float32),
                                   np.zeros(size, np.float32)]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    env.restart_simulation(
        sim_params=flow_params['sim'], render=flow_params['sim'].render)

    
    # Define variables to collect probability data
    # TODO(KL) HARD CODED is_ped_visible is the 5th item in the state vector

    ped_idx = 4
    probs_action_given_ped = []
    probs_action_given_no_ped = []

    # updated priors list
    probs_ped_given_action_updated_priors = []
    probs_no_ped_given_action_updated_priors = []

    # fixed priors list
    probs_ped_given_action_fixed_priors = []
    probs_no_ped_given_action_fixed_priors = []

    # updated Pr(ped), Pr(no_ped)        
    updated_prior_prob_ped = 0.5
    updated_prior_prob_no_ped = 0.5

    # fixed Pr(ped), Pr(no_ped)   
    fixed_prior_prob_ped = 0.5
    fixed_prior_prob_no_ped = 0.5

    visible_pedestrian = []

    for i in range(args.num_rollouts):
        state = env.reset()
        # divide by 5 to get "time" in the simulation
        for _ in range(300):
            vehicles = env.unwrapped.k.vehicle
            pedestrian = env.unwrapped.k.pedestrian

            if multiagent:
                action, logits = {}, {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(state[agent_id], 
                                                    state=state_init[agent_id],
                                                    policy_id=policy_map_fn(agent_id))
                    else:
                        curr_ped = state[agent_id][ped_idx]
                        visible_pedestrian.append(curr_ped)

                        flipped_ped = 1 if curr_ped == 0 else 0
                        
                        ped_flipped_state = np.copy(state[agent_id])
                        ped_flipped_state[ped_idx] = flipped_ped

                        action[agent_id], _, logit_actual = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id), full_fetch=True)
                            
                        _, _, logit_flipped = agent.compute_action(
                            ped_flipped_state, policy_id=policy_map_fn(agent_id), full_fetch=True)

                        mu_ped, ln_sigma_ped = logit_actual['behaviour_logits']
                        mu_no_ped, ln_sigma_no_ped = logit_flipped['behaviour_logits']

                        sigma_ped = np.exp(ln_sigma_ped)
                        sigma_no_ped = np.exp(ln_sigma_no_ped)

                        action_ = action[agent_id][0]

                        # dict of probs for updated and fixed priors
                        probs_fixed = pr_ped_given_action(action_, mu_ped, sigma_ped, mu_no_ped, sigma_no_ped, fixed_prior_prob_ped, fixed_prior=True)
                        probs_updated = pr_ped_given_action(action_, mu_ped, sigma_ped, mu_no_ped, sigma_no_ped, updated_prior_prob_ped, fixed_prior=False)

                        probs_action_given_ped.append(probs_fixed["pr_a_given_ped"])
                        probs_action_given_no_ped.append(probs_fixed["pr_a_given_no_ped"])
                        
                        # Fixed priors: Pr(ped | action), Pr(no_ped | action)
                        probs_ped_given_action_fixed_priors.append(probs_fixed["pr_ped_given_action"])
                        probs_no_ped_given_action_fixed_priors.append(probs_fixed["pr_no_ped_given_action"])
                        fixed_prior_prob_ped = probs_fixed["prior"]

                        # Updated priors: Pr(ped | action), Pr(no_ped | action)
                        probs_ped_given_action_updated_priors.append(probs_updated["pr_ped_given_action"])
                        probs_no_ped_given_action_updated_priors.append(probs_updated["pr_no_ped_given_action"])
                        updated_prior_prob_ped = probs_updated["prior"] 

                        # set a lower bound for priors
                        updated_prior_prob_ped = probs_updated["prior"] if probs_updated["prior"] < 0.99 else 0.99
#                         %matplotlib qt
#                         vehicles.get_viewable_objects('rl_0', \
#                                     pedestrians=pedestrian, visualize=True)
                        


            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)

            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break    
            state, reward, done, _ = env.step(action)   
        %matplotlib inline    

        plot_2_lines(probs_ped_given_action_updated_priors, probs_no_ped_given_action_updated_priors, ['Pr(ped | action) using updated priors', 'Pr(no_ped | action) using updated priors'], viewable_ped=visible_pedestrian)
        plot_2_lines(probs_ped_given_action_fixed_priors, probs_no_ped_given_action_fixed_priors, ['Pr(ped | action) using fixed priors of Pr(ped) = 0.5', 'Pr(no_ped | action) using fixed priors of Pr(ped) = 0.5'], viewable_ped=visible_pedestrian)
        plot_2_lines(probs_action_given_ped, probs_action_given_no_ped, ['Pr(action | ped)', 'Pr(action | no_ped)'], viewable_ped=visible_pedestrian)


In [4]:
def pr_ped_given_action(action, mu_ped, s_ped, mu_no_ped, s_no_ped, prior, fixed_prior=True):
    """
    @Params
    mu_ped, s_ped: mean, sd pair from the policy receiving an input state where there is a visible pedestrian 
    mu_no_ped, s_no_ped: mean, sd pair from the policy receiving an input state where there is no visible pedestrian 

    action: the vehicle's acceleration as dictated by the policy
    prior: Pr(ped)
    fixed_prior: Boolean telling us whether to 'update' the prior Pr(ped) using Pr(ped | action) or not

    @Returns
    
    probs, a dict containing:
    1. Pr(action | ped)
    2. Pr(action | no_ped)
    3. Pr(ped | action)
    4. Pr(no_ped | action)
    5. Pr(ped) for the next computation of Pr(ped|action)
    
    3, 4, 5 are calculated subject to the fixed_prior parameter
    """
    probs = {}
    
    # Compute 1, 2: Pr(action | ped), Pr(action | no_ped)
    unnormed_pr_action_given_ped = accel_pdf(mu_ped, s_ped, action)
    unnormed_pr_action_given_no_ped = accel_pdf(mu_no_ped, s_no_ped, action)
    
    pr_a_given_ped = unnormed_pr_action_given_ped / (unnormed_pr_action_given_ped + unnormed_pr_action_given_no_ped)
    pr_a_given_no_ped = 1 - pr_a_given_ped
    
    probs["pr_a_given_ped"] = pr_a_given_ped
    probs["pr_a_given_no_ped"] = pr_a_given_no_ped
    
    # Compute 3, 4: Pr(ped | action), Pr(no_ped | action)
    # Apply Bayes' rule
    pr_ped_given_action = (pr_a_given_ped * prior) / ((pr_a_given_ped * prior)  + (pr_a_given_no_ped * (1 - prior)))
    pr_no_ped_given_action = (pr_a_given_no_ped * (1 - prior)) / ((pr_a_given_ped * prior)  + (pr_a_given_no_ped * (1 - prior)))
    probs["pr_ped_given_action"] = pr_ped_given_action
    probs["pr_no_ped_given_action"] = pr_no_ped_given_action
                    
    if fixed_prior:
        probs["prior"] = prior
    else:
        probs["prior"] = probs["pr_ped_given_action"]
    return probs
    

def accel_pdf(mu, sigma, actual):
    """Return pdf evaluated at actual acceleration"""
    coeff = 1 / np.sqrt(2 * np.pi * (sigma**2))
    exp = -0.5 * ((actual - mu) / sigma)**2
    return coeff * np.exp(exp)

def run_transfer(args):
    # run transfer on the bayesian 1 env first
    bayesian_1_params = bayesian_1_flow_params(pedestrians=True, render=True)
    env, env_name = create_env(args, bayesian_1_params)
    agent, config = create_agent(args, flow_params=bayesian_1_params)
    run_env(env, agent, config, bayesian_1_params)

def plot_2_lines(y1, y2, legend, viewable_ped=False):
    x = np.arange(len(y1))
    plt.plot(x, y1)
    plt.plot(x, y2)
    if viewable_ped:
        plt.plot(x, viewable_ped)
    plt.legend(legend, bbox_to_anchor=(0.5, 1.05), loc=3, borderaxespad=0.)
   
    plt.draw()
    plt.pause(0.001)

In [5]:
def create_parser():
    """Create the parser to capture CLI arguments."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='[Flow] Evaluates a reinforcement learning agent '
                    'given a checkpoint.',
        epilog=EXAMPLE_USAGE)

    # required input parameters
    parser.add_argument(
        'result_dir', type=str, help='Directory containing results')
    parser.add_argument('checkpoint_num', type=str, help='Checkpoint number.')

    # optional input parameters
    parser.add_argument(
        '--run',
        type=str,
        help='The algorithm or model to train. This may refer to '
             'the name of a built-on algorithm (e.g. RLLib\'s DQN '
             'or PPO), or a user-defined trainable function or '
             'class registered in the tune registry. '
             'Required for results trained with flow-0.2.0 and before.')
    parser.add_argument(
        '--num_rollouts',
        type=int,
        default=1,
        help='The number of rollouts to visualize.')
    parser.add_argument(
        '--gen_emission',
        action='store_true',
        help='Specifies whether to generate an emission file from the '
             'simulation')
    parser.add_argument(
        '--evaluate',
        action='store_true',
        help='Specifies whether to use the \'evaluate\' reward '
             'for the environment.')
    parser.add_argument(
        '--render_mode',
        type=str,
        default='sumo_gui',
        help='Pick the render mode. Options include sumo_web3d, '
             'rgbd and sumo_gui')
    parser.add_argument(
        '--save_render',
        action='store_true',
        help='Saves a rendered video to a file. NOTE: Overrides render_mode '
             'with pyglet rendering.')
    parser.add_argument(
        '--horizon',
        type=int,
        help='Specifies the horizon.')
    
    return parser

In [6]:
parser = create_parser()
args = parser.parse_args(["./Bayesian1Env_with_pedestrians/", "100"])
ray.shutdown()
ray.init(num_cpus=1)
run_transfer(args)

2020-03-27 23:17:56,507	INFO node.py:498 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-03-27_23-17-56_507156_29827/logs.
2020-03-27 23:17:56,623	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:25211 to respond...
2020-03-27 23:17:56,737	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:48626 to respond...
2020-03-27 23:17:56,743	INFO services.py:809 -- Starting Redis shard with 3.33 GB max memory.
2020-03-27 23:17:56,794	INFO node.py:512 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-03-27_23-17-56_507156_29827/logs.
2020-03-27 23:17:56,797	INFO services.py:1475 -- Starting the Plasma object store with 4.99 GB memory using /dev/shm.


NOTE: With render mode sumo_gui, an extra instance of the SUMO GUI will display before the GUI for visualizing the result. Click the green Play arrow to continue.
(2.1)--(1.1) (1.1)--(1.2) 1 1
True
NOTE: With render mode sumo_gui, an extra instance of the SUMO GUI will display before the GUI for visualizing the result. Click the green Play arrow to continue.
(2.1)--(1.1) (1.1)--(1.2) 1 1
Error making env  Cannot re-register id: Bayesian1Env-v0
True


2020-03-27 23:17:59,125	ERROR log_sync.py:34 -- Log sync requires cluster to be setup with `ray up`.


(2.1)--(1.1) (1.1)--(1.2) 1 1
Error making env  Cannot re-register id: Bayesian1Env-v0
True


2020-03-27 23:18:00,253	INFO rollout_worker.py:319 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2020-03-27 23:18:00,372	INFO dynamic_tf_policy.py:324 -- Initializing loss function with dummy input:

{ 'action_prob': <tf.Tensor 'av/action_prob:0' shape=(?,) dtype=float32>,
  'actions': <tf.Tensor 'av/actions:0' shape=(?, 1) dtype=float32>,
  'advantages': <tf.Tensor 'av/advantages:0' shape=(?,) dtype=float32>,
  'behaviour_logits': <tf.Tensor 'av/behaviour_logits:0' shape=(?, 2) dtype=float32>,
  'dones': <tf.Tensor 'av/dones:0' shape=(?,) dtype=bool>,
  'new_obs': <tf.Tensor 'av/new_obs:0' shape=(?, 17) dtype=float32>,
  'obs': <tf.Tensor 'av/observation:0' shape=(?, 17) dtype=float32>,
  'prev_actions': <tf.Tensor 'av/action:0' shape=(?, 1) dtype=float32>,
  'prev_rewards': <tf.Tensor 'av/prev_reward:0' shape=(?,) dtype=float32>,
  'rewards': <tf.Tensor 'av/rewards:0' shape=(?,) dtype=float32>,
  'value_targets': <tf.Tensor 'av/value_targets:0' sh

ValueError: ('Observation outside expected value range', Box(17,), array([ 0.,  0.,  1., 20.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]))

In [None]:
import dill
with open("policy_0.pkl", 'wb') as f:
    dill.dump(run_transfer, f)
    dill.dump(args, f)
    dill.dump(bayesian_1_flow_params, f)
    dill.dump(create_env, f)
    dill.dump(create_agent, f)
    dill.dump(run_env, f)
    dill.dump(pr_ped_given_action, f)
    dill.dump(accel_pdf, f)
    dill.dump(plot_2_lines, f)
    with open('./requirements.txt', 'r') as req:
        x = req.readlines()
        dill.dump(x, f)
