# PPO - 2-way Single Intersection

## Imports

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from sumo_rl import SumoEnvironment

from tyre_pm_reward import tyre_pm_reward

## Create Model and Instantiate Agent

In [None]:
env = SumoEnvironment(
    net_file="nets/2way-single-intersection/single-intersection.net.xml",
    route_file="nets/2way-single-intersection/single-intersection-vhvh.rou.xml",
    out_csv_name="outputs/2way-single-intersection/ppo",
    single_agent=True,
    use_gui=False,
    num_seconds=1e5,
    reward_fn=tyre_pm_reward
)

model = PPO(
    "MlpPolicy",
    env,
    learning_rate=5e-3,
    n_steps=256,
    batch_size=256,
    n_epochs=5,
    gamma=0.95,
    gae_lambda=0.99,
    clip_range=0.3,
    ent_coef=0.09,
    vf_coef=0.05,
    max_grad_norm=0.9,
    tensorboard_log="./logs/2way-single-intersection/ppo_test",
    verbose=1
)

In [None]:
# Evaluate untrained random agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1)

print(mean_reward)
print(std_reward)

## Train and Evaluate Agent

In [None]:
model.learn(total_timesteps=1e5)

mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1)
print(mean_reward)
print(std_reward)

In [None]:
model.save("ppo_single_intersection")