In [1]:
import numpy as np
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import src.constants as constants

In [2]:
grey_light = "rgb(224, 224, 224)"

axis_common_dict = dict(
    linecolor="black",
    linewidth=2,
    ticks="outside",
    title=dict(standoff=15),
)

colorway = px.colors.qualitative.G10
fillcolors = [px.colors.hex_to_rgb(x) + (0.15,)  for x in colorway]
fillcolors = [f"rgba{x}" for x in fillcolors]

template = go.layout.Template()
template.layout = dict(
    paper_bgcolor="white",
    plot_bgcolor=grey_light,
    font=dict(color="black"),
    title=dict(
        font=dict(size=18),
        x=0.1, xanchor="left",
        y=0.9, yanchor="top"
    ),
    xaxis=dict(
        **axis_common_dict,
    ),
    yaxis=dict(
        **axis_common_dict,
    )
)

In [19]:
title = "DQN - Weighted Mean Comparison"
save_title = "dqn_weighted_mean"
model_names = [
    "DQN-n-m-w99",
    "DQN-n-m-xxlh-pmp30-g65-ftu5k",
    "DQN-n-m-noweight",
]
model_legend_names = [
    "DQN - Alpha 0.99",
    "DQN - Alpha 0.999",
    "DQN - No Weighting",
]

In [20]:
model_dirs = [
    os.path.join(constants.MODELS_PATH, m)
    for m in model_names
]
indices = [10_000, 100_000, 200_000, 1_000_000, 2_000_000, 
           3_000_000, 4_000_000, 5_000_000, 6_000_000]
ticktext = ["10K", "", "200K", "1M", "2M", "3M", "4M", "5M", "6M"]

model_data = []
for model_dir in model_dirs:
    seeds = [7, 42]
    data_paths = []
    for s in seeds:
        data_path = os.path.join(
            model_dir,
            f"predictions_{s}",
            "eval_results.txt"
        )
        if os.path.exists(data_path):
            data_paths.append(data_path)

    data_runs = []
    for path in data_paths:
        data_run = pd.read_csv(path, sep="\t")
        data_runs.append(data_run)

    model_data.append(data_runs)

In [21]:
n_models = len(model_data)
model_results = []
for i in range(n_models):
    n_runs = len(model_data[i])
    returns = np.zeros((n_runs, 9))
    for j in range(n_runs):
        returns[j] = model_data[i][j]["mean_return"]
    model_results.append(returns)

In [22]:
model_results

[array([[0.64328213, 0.71479741, 0.71180467, 0.76073956, 0.76676553,
         0.77380826, 0.78043713, 0.78407329, 0.78433035],
        [0.65737859, 0.71204117, 0.73988804, 0.75118033, 0.76982743,
         0.77204346, 0.77803751, 0.77865622, 0.77991314]]),
 array([[0.64315095, 0.71510084, 0.71198282, 0.75892853, 0.7658037 ,
         0.77188957, 0.77955978, 0.78420535, 0.78282872],
        [0.65743038, 0.7137873 , 0.74169531, 0.75352631, 0.77246987,
         0.77495616, 0.77898313, 0.78005803, 0.78113751]]),
 array([[0.64321319, 0.71531888, 0.71217753, 0.75688284, 0.76479244,
         0.76929255, 0.77484539, 0.78022357, 0.78027279]])]

In [23]:
models_returns_mean = [
    np.mean(results, axis=0)
    for results in model_results
]
models_returns_std = [
    np.std(results, axis=0)
    for results in model_results
]

In [24]:
models_returns_mean

[array([0.65033036, 0.71341929, 0.72584636, 0.75595994, 0.76829648,
        0.77292586, 0.77923732, 0.78136476, 0.78212174]),
 array([0.65029066, 0.71444407, 0.72683907, 0.75622742, 0.76913678,
        0.77342286, 0.77927146, 0.78213169, 0.78198311]),
 array([0.64321319, 0.71531888, 0.71217753, 0.75688284, 0.76479244,
        0.76929255, 0.77484539, 0.78022357, 0.78027279])]

In [25]:
models_returns_std

[array([0.00704823, 0.00137812, 0.01404168, 0.00477962, 0.00153095,
        0.0008824 , 0.00119981, 0.00270854, 0.0022086 ]),
 array([0.00713972, 0.00065677, 0.01485625, 0.00270111, 0.00333309,
        0.00153329, 0.00028833, 0.00207366, 0.00084561]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0.])]

In [26]:
fig = go.Figure()

for i in range(n_models):
    returns_mean = models_returns_mean[i]
    returns_std = models_returns_std[i]

    fig.add_trace(
        go.Scatter(
            x=indices,
            y=returns_mean,
            mode="markers+lines",
            name=model_legend_names[i],
            marker=dict(
                size=4,
                color=colorway[i]
            ),
        )
    )

    fig.add_trace(
        go.Scatter(
            x=indices + indices[::-1],
            y=list(returns_mean + returns_std) + \
              list(returns_mean - returns_std)[::-1],
            fill="tozerox",
            fillcolor=fillcolors[i],
            line_color="rgba(255,255,255,0)",
            showlegend=False
        )
    )

argmax = np.argmax(models_returns_mean) % len(models_returns_mean[0])
best_model = np.argmax(models_returns_mean) // len(models_returns_mean[0])
max = np.max(models_returns_mean)
fig.add_annotation(
    x=indices[argmax],
    y=max,
    text=f"{model_legend_names[best_model]}: {max:.4f}",
    showarrow=True,
    arrowhead=6,
    arrowsize=0.75
)

fig.update_layout(
    title=title,
    xaxis=dict(
        tickangle=60,
        tickvals=indices,
        ticktext=ticktext,
        range=[0, 6_250_000],
        title="Training Steps"
    ),
    yaxis=dict(
        range=[0.62, 0.79],
        dtick=0.02,
        title="Discounted Return"
    ),
    width=850, height=500,
    template=template
)
fig.update_yaxes(template.layout.yaxis)
fig.update_xaxes(template.layout.xaxis)
fig.show()

In [27]:
fig.write_image(f"./visualizations/results/{save_title}.pdf")