<h2>Install Packages</h2>

In [None]:
#Reference for this code came from https://docs.ray.io/en/latest/rllib/rllib-algorithms.html#dqn
!pip install ray
!pip install gymnasium
!pip install "gymnasium[atari]"
!pip install "gymnasium[accept-rom-license]"
!pip install -U tensorboardx

import ray
import gymnasium 
import torch
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
                 
from ray.rllib.algorithms.dqn.dqn import DQNConfig
from ray.rllib.algorithms.ppo import PPOConfig
from ray.tune.logger import pretty_print
from ray import air, tune


<h3>Select device</h3>

In [None]:
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device('cuda')

print("Selected device:",device)

<h3>Train</h3>

In [None]:
#Reference for this code came from https://docs.ray.io/en/latest/rllib/rllib-algorithms.html#dqn

environ = "Riverraid-ramDeterministic-v4"
episodes = 200

parameters = [
      {"experiment_id":1, "capacity":5000, "prioritized_replay_alpha": 0.6,"prioritized_replay":False, "episodes":episodes, "display":["prioritized_replay"]}
     ,{"experiment_id":2, "capacity":5000, "prioritized_replay_alpha": 0.6,"prioritized_replay":True, "episodes":episodes, "display":["prioritized_replay"]}
]

final_results = []
for param in parameters:
  config = DQNConfig()
  config = config.environment(environ)
  config.replay_buffer_config["capacity"] = param["capacity"]
  config.replay_buffer_config["prioritized_replay_alpha"] = param["prioritized_replay_alpha"]
  config.double_q = True
  if torch.cuda.is_available(): config = config.resources(num_gpus = 1)
  config.replay_buffer_config["prioritized_replay"] = param["prioritized_replay"]
  algo = config.build()


  episode_results = []
  start_time = datetime.now()
  for episode in range(param["episodes"]):
      results = algo.train()
      episode_results.append({"experiment_id": param["experiment_id"], "parameters": param, "episode": episode,"episode_mean_reward": results["episode_reward_mean"]})
      if episode%10==0:
        end_time = datetime.now()
        time_diff = end_time-start_time
        print("Epoch",episode,"Episode Mean Reward",results["episode_reward_mean"], "Duration(s)",time_diff.total_seconds())
        start_time = datetime.now()
  final_results += episode_results





In [None]:
for params in parameters:
    # Get the experiment id
    id = params["experiment_id"]
    display_params = params["display"]
    # Get all the results for this experiment
    exp_res = [res for res in final_results if res["experiment_id"] == id]
    
    # Build the label
    display_label = ""
    for item in display_params:
        display_label += item + ": " + str(params[item]) + " "


    # For the experiment, get the metrics to display
    rewards = np.array([[res["episode"], res["episode_mean_reward"], 0] for res in exp_res])

    for i in range(len(rewards)):
        rewards[i][2] = sum(rewards[0:i + 1, [1]]) / (i + 1)  # calculate the average cumulative reward

    plt.figure(1)
    plt.plot(rewards[:, [0]], rewards[:, [1]], label=display_label)
    plt.title("Episode Mean Reward")

plt.figure(1)
plt.legend()

plt.show()