In [None]:
import utils as Utils
import numpy as np
import json
import gym

from dueling_dqn_agent import DuelingDQN_Agent
from double_dqn_agent import Double_DQN_Agent
from agent_handler import Agent_handler
from dqn_agent import DQN_Agent

Initialize the environment and get the number of observations and actions to be given to the agents

In [None]:
env = gym.make('MsPacman-v4', render_mode='rgb_array')

num_obs, num_actions = Utils.describe_env(env)

The following parameters will be shared across the agents

In [None]:
params = {
  "num_obs": (4, 86, 80),
  "num_actions": num_actions,
  "update_rate": 20,
  "learning_rate": 0.00005,
  "discount_factor": 0.95,
  "exploration_factor": 1,
  "min_exploration_rate": 0.05,
  "exploration_decay": 0.995,
  "batch_size": 16,
}

In [None]:
agents = []

agents.append(DQN_Agent(params))
agents.append(Double_DQN_Agent(params))
agents.append(DuelingDQN_Agent(params))


Initialize the handler

In [None]:
handler = Agent_handler({
  "num_episodes":100,
  "max_steps":5000,
  "notify_percent":10,
  "skip": 85,
  "checkpoint_interval": 100,
  "crop": {
    "top": 0,
    "bottom": -39,
    "left": 0,
    "right": -1,
  }
})

Train and save the results

In [None]:
results = handler.train(agents, env)

output_file_path = "results.json"
with open(output_file_path, "w") as json_file:
  json.dump(results, json_file)

print(f"Results saved to {output_file_path}")

Load and plot the results

In [None]:
with open('results2.json', 'r') as file:
    results = json.load(file)

In [None]:
window_size = 5

for agent, result in results.items():
  moving_average = np.convolve(result["rewards"], np.ones(window_size)/window_size, mode='valid')

  padding = np.full(5 - 1, 0)
  result_array = np.concatenate([padding, moving_average])
  result["rewards averages"] = result_array
  Utils.plot_results(result, agent)