In [1]:
import gymnasium
import highway_env

from matplotlib import pyplot as plt





  from pkg_resources import resource_stream, resource_exists


In [2]:
config={
#     "render_agent":False,
#     "high_speed_reward":1.2,
#     "vehicles_count": 30
}
env = gymnasium.make('highway-fast-v0',max_episode_steps=-1,config=config)


In [3]:
from algorithms.Agent import Agent
from algorithms.RDQN import RDQN

# agent = TestAgent(env)


agent: Agent = RDQN(env,total_training_steps=1500)

In [4]:
NUM_EPISODES = 1500

def train_agent(num_episodes):
    total_rewards = []
    discounted_rewards = []

    for episode in range(num_episodes):
        print(f"Episode {episode}")
        
        total_reward = 0
        discounted_reward = 0 
        gamma_pow = 1.0       
        
        done = truncated = False
        obs, info = env.reset()
        
        while not (done or truncated):
            action = agent.predict(obs.flatten())

            next_obs, reward, done, truncated, info = env.step(action)
            
            total_reward += reward
            
            discounted_reward += reward * gamma_pow
            gamma_pow *= agent.gamma # Decay the discount factor
            
            agent.learn(obs.flatten(), action, reward, (done, next_obs.flatten()))

            obs = next_obs
            
        total_rewards.append(total_reward)
        discounted_rewards.append(discounted_reward)

        print(f"Total Reward: {round(total_reward, 2)} | Discounted: {round(discounted_reward, 2)}")

    return total_rewards,discounted_rewards

total_rewards, discounted_rewards  = train_agent(NUM_EPISODES)
print(sum(total_rewards))
    


Episode 0
Total Reward: 18.25 | Discounted: 5.16
Episode 1
Total Reward: 6.27 | Discounted: 3.99
Episode 2
Total Reward: 22.02 | Discounted: 4.87
Episode 3
Total Reward: 22.25 | Discounted: 5.08
Episode 4
Total Reward: 21.02 | Discounted: 4.65
Episode 5
Total Reward: 21.02 | Discounted: 4.65
Episode 6
Total Reward: 21.02 | Discounted: 4.65
Episode 7
Total Reward: 20.02 | Discounted: 4.43
Episode 8
Total Reward: 22.02 | Discounted: 4.87
Episode 9
Total Reward: 22.15 | Discounted: 5.0
Episode 10
Total Reward: 20.02 | Discounted: 4.43
Episode 11
Total Reward: 21.65 | Discounted: 4.61
Episode 12
Total Reward: 20.02 | Discounted: 4.43
Episode 13
Total Reward: 22.29 | Discounted: 5.11
Episode 14
Total Reward: 20.02 | Discounted: 4.43
Episode 15
Total Reward: 22.55 | Discounted: 5.29
Episode 16
Total Reward: 21.85 | Discounted: 4.75
Episode 17
Total Reward: 22.42 | Discounted: 5.21
Episode 18
Total Reward: 22.15 | Discounted: 5.0
Episode 19
Total Reward: 22.69 | Discounted: 5.35
Episode 20
To

In [5]:
import numpy as np
import os
from datetime import datetime

def plot_and_save_rewards(rewards, name, base_dir="runs"):
    # create time-based directory
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
    run_dir = os.path.join(base_dir, timestamp)
    os.makedirs(run_dir, exist_ok=True)

    # smoothing
    window = 20
    smoothed = np.convolve(rewards, np.ones(window) / window, mode="valid")

    # plot
    plt.figure()
    plt.plot(rewards, alpha=0.3, label="Raw")
    plt.plot(range(window - 1, len(rewards)), smoothed, label="Smoothed")
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title(f"{name} (Smoothed)")
    plt.legend()

    # save figure
    fig_path = os.path.join(run_dir, f"{name}.png")
    plt.savefig(fig_path, dpi=300, bbox_inches="tight")
    plt.close()

    # save rewards
    rewards_path = os.path.join(run_dir, f"{name}.csv")
    np.savetxt(
        rewards_path,
        np.column_stack((np.arange(len(rewards)), rewards)),
        delimiter=",",
        header="episode,reward",
        comments=""
    )

    model_path = os.path.join(run_dir, "model.tar")
    agent.save_model(model_path)
    

# usage
plot_and_save_rewards(total_rewards, "total_rewards")
plot_and_save_rewards(discounted_rewards, "discounted_rewards")



In [6]:
from gymnasium.wrappers import RecordVideo
import cv2


NUM_EPISODES = 5
test_rewards = []
env = gymnasium.make('highway-v0', render_mode='rgb_array',config={"duration":60})
env = RecordVideo(env, video_folder="videos",
              episode_trigger=lambda e: True)
env.unwrapped.set_record_video_wrapper(env)

for episode in range(NUM_EPISODES):
  print(f"Episode {episode}")
  done = truncated = False

  obs, info = env.reset()
  
  while not (done or truncated):
    action = agent.predict(obs.flatten())

    next_obs, reward, done, truncated, info = env.step(action)
    test_rewards.append(reward)
    obs=next_obs
    env.render()
env.close()


print(sum(test_rewards))


  logger.warn(


Episode 0
Episode 1
Episode 2
Episode 3
Episode 4
191.4207778483232
