In [None]:
import sys
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

# Q-Learning

## basic Q-learning performance

In [None]:
with open("data/vanilla_atari.pkl", "rb") as f:
    data = pickle.load(f)
ax = plt.gca()
ax.xaxis.get_major_formatter().set_powerlimits((0,1))
plt.plot(data["timestamp"], data["mean_episode_reward"])
plt.plot(data["timestamp"], data["best_mean_episode_reward"])

plt.title("learning curve for basic Q-learning")
plt.legend(["mean_episode_reward", "best_mean_episode_reward"])
plt.xlabel("timestep")
plt.ylabel("reward")

plt.savefig("Figure_1.png", dpi=150)
plt.show()

## double DQN vs vanilla DQN

In [None]:
with open("data/double_atari.pkl", "rb") as f:
    double_data = pickle.load(f)
with open("data/vanilla_atari.pkl", "rb") as f:
    vanilla_data = pickle.load(f)

ax = plt.gca()
ax.xaxis.get_major_formatter().set_powerlimits((0,1))

plt.plot(vanilla_data["timestamp"], vanilla_data["mean_episode_reward"], color='r')
plt.plot(vanilla_data["timestamp"], vanilla_data["best_mean_episode_reward"], color='orange')
plt.plot(double_data["timestamp"], double_data["mean_episode_reward"], color='b')
plt.plot(double_data["timestamp"], double_data["best_mean_episode_reward"], color='green')

plt.title("learning curve for basic Q-learning and double Q-learning")
plt.legend(["vanilla_mean", "vanilla_best", "double_mean", "double_best"])
plt.xlabel("timestep")
plt.ylabel("reward")

plt.savefig("Figure_2.png", dpi=150)
plt.show()

## experimenting with hyperparameters

In [None]:
lr_list = ["1e-2", "5e-3", "1e-3", "5e-4", "1e-4"]

plt.figure(figsize=(9, 6))
ax = plt.gca()
ax.xaxis.get_major_formatter().set_powerlimits((0,1))

color_list = ["r", "g", "b", "pink", "orange"]

for lr, color in zip(lr_list, color_list):
    with open("data/lunar_lander_" + lr + ".pkl", "rb") as f:
        data = pickle.load(f)
    plt.plot(data["timestamp"], data["mean_episode_reward"], '-.', c=color)
    plt.plot(data["timestamp"], data["best_mean_episode_reward"], c=color)

plt.title("learning curve for basic Q-learning with different learning rate")
legend_1 = ["mean: " + lr for lr in lr_list]
legend_2 = ["best: " + lr for lr in lr_list]
legend = []
for i, j in zip(legend_1, legend_2):
    legend.extend([i, j])
plt.legend(legend)
plt.xlabel("timestep")
plt.ylabel("reward")

plt.savefig("Figure_3.png", dpi=150)
plt.show()