In [1]:
import tensorflow as tf
import numpy as np

from garage import wrap_experiment
from garage.envs import GymEnv, normalize
from garage.experiment import Snapshotter
from garage.experiment.deterministic import set_seed
from garage.sampler import RaySampler, MultiprocessingSampler, Sampler, LocalSampler
from garage.tf.algos import PPO
from garage.tf.baselines import GaussianMLPBaseline
from garage.tf.policies import GaussianMLPPolicy
from garage.trainer import TFTrainer
import garage

from dowel import logger, tabular
import akro
import pandas as pd
from multiprocessing import Pool


In [2]:
def kmh_to_ms(val):
    return val/3.6

specs = {
    "aMax": 2,
    "bMax": 9,
    "bComf": 2,
    "jComf": 2,
    "jMax": 20,
    "jMin": 20,
    "tTarget": 1,
    "gapMin": 2,
    "vTarget": kmh_to_ms(50),
    "vMax": kmh_to_ms(150),
    "vMin": 0,
    "timestep": 1,
    "clipdist": 500,
    "gamma_gap": 1,
    "gamma_follow": 1,
    "gamma_accel": 1,
    "gamma_jerk": 1,
    "gamma_crit": 1
}
        

In [3]:
def run_test(envs, policy):
    debug_memories = [[] for _ in envs]
    state = np.array([e.reset()[0] for e in envs])
    total_reward = 0

    for i in range(0, 100):
        action, policy_info = policy.get_actions(state)

        statewrappers = [e.step(a) for a, e in zip(action, envs)]
        
        terminals = np.array([(s.step_type == garage.StepType.TERMINAL or s.step_type == garage.StepType.TIMEOUT) for s in statewrappers])
        rewards = np.array([s.reward for s in statewrappers])
        total_reward += np.sum(rewards)
        
        next_state = np.array([s.observation for s in statewrappers])
        infos = [s.env_info for s in statewrappers]

        for i in range(len(envs)):
            debug_memories[i].append((
                state[i],
                action[i],
                next_state[i],
                rewards[i],
                terminals[i],
                infos[i]
            ))
        
        if np.any(terminals):
            next_state[terminals] = [e.reset()[0] for e,t in zip(envs, terminals) if t]
        
        state = next_state
    
    return debug_memories, total_reward

In [33]:
snapshotter = Snapshotter()
tf.keras.backend.clear_session()
with tf.compat.v1.Session():
    data = snapshotter.load('data/local/experiment/ppo_car', itr=10)
    policy = data['algo'].policy
    env = data['env']
    with Pool(4) as p:
        path = p.map(lambda x: garage.rollout(env, policy, deterministic=True, max_episode_length=1000), range(4))




In [37]:
path["env_infos"].keys()

dict_keys(['rGap', 'rFollow', 'rAccel', 'rJerk', 'rKrit', 'vOpt', 'bKin'])

In [32]:
env.unwrapped.owncar.__dict__

{'pos': 13.710673332214355,
 'speed': 7.199314117431641,
 'accel': 1.9717025756835938,
 'jerk': -0.02829742431640625}