In [1]:
import numpy as np
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import src.constants as constants

In [21]:
green = "rgb(114, 224, 175)"
green_alpha = "rgba(114, 224, 175, 0.2)"
green_dark = "rgb(75, 184, 135)"

grey = "rgb(158, 158, 158)"
grey_light = "rgb(224, 224, 224)"

red = "rgb(250, 33, 0)"
red_alpha = "rgba(250, 33, 0, 0.1)"
red_dark = "rgb(194, 38, 14)"
red_light = "rgb(255, 71, 71)"

axis_common_dict = dict(
    linecolor="black",
    linewidth=2,
    ticks="outside",
    title=dict(standoff=15),
    #gridcolor=grey
)

colorway = px.colors.qualitative.G10
fillcolors = [px.colors.hex_to_rgb(x) + (0.15,)  for x in colorway]
fillcolors = [f"rgba{x}" for x in fillcolors]

template = go.layout.Template()
template.layout = dict(
    paper_bgcolor="white",
    plot_bgcolor=grey_light,
    #colorway=[green_dark, green, red, red_dark, "orange", "blue", "blue"],
    #color_disc=colorway,
    font=dict(color="black"),
    title=dict(
        font=dict(size=18),
        x=0.1, xanchor="left",
        y=0.9, yanchor="top"
    ),
    xaxis=dict(
        **axis_common_dict,
    ),
    yaxis=dict(
        **axis_common_dict,
    )
)
template.data.histogram = [go.Histogram(marker=dict(line=dict(width=2, color=green)))]

In [159]:
title = "DQN - User Encoder Comparison"
save_title = "dqn_user_encoding"
model_names = [
    "DQNGRU-n-bi-att",
    "DQNAtt-n-att",
    "DQN-n-m-xxlh-pmp30-g65-ftu5k",
    "DQN-n-l",
    "DQN-n-d"
]
model_legend_names = [
    "DQN GRU - Attention Pooling",
    "DQN Attention - Attention Pooling",
    "DQN Weighted Mean - 0.999",
    "DQN LTSTL",
    "DQN Distribution"
]
model_dirs = [
    os.path.join(constants.MODELS_PATH, m)
    for m in model_names
]
indices = [10_000, 100_000, 200_000, 1_000_000, 2_000_000, 
           3_000_000, 4_000_000, 5_000_000, 6_000_000]
ticktext = ["10K", "", "200K", "1M", "2M", "3M", "4M", "5M", "6M"]

model_data = []
for model_dir in model_dirs:
    seeds = [7, 42, 1998]
    data_paths = []
    for s in seeds:
        data_path = os.path.join(
            model_dir,
            f"predictions_{s}",
            "eval_results.txt"
        )
        if os.path.exists(data_path):
            data_paths.append(data_path)

    data_runs = []
    for path in data_paths:
        data_run = pd.read_csv(path, sep="\t")
        data_runs.append(data_run)

    model_data.append(data_runs)

In [160]:
n_models = len(model_data)
model_results = []
for i in range(n_models):
    n_runs = len(model_data[i])
    returns = np.zeros((n_runs, 9))
    for j in range(n_runs):
        returns[j] = model_data[i][j]["mean_return"]
    model_results.append(returns)

In [161]:
model_results

[array([[0.64251791, 0.71796598, 0.73635058, 0.75997916, 0.77495008,
         0.7747355 , 0.78354669, 0.78574093, 0.78351501]]),
 array([[0.63719196, 0.69061012, 0.70615525, 0.76035325, 0.76373463,
         0.76878421, 0.77303177, 0.77640959, 0.77666995]]),
 array([[0.64315095, 0.71510084, 0.71198282, 0.75892853, 0.7658037 ,
         0.77188957, 0.77955978, 0.78420535, 0.78282872],
        [0.65743038, 0.7137873 , 0.74169531, 0.75352631, 0.77246987,
         0.77495616, 0.77898313, 0.78005803, 0.78113751]]),
 array([[0.63769263, 0.68523306, 0.71061937, 0.74504234, 0.75781783,
         0.76331387, 0.76626955, 0.7665512 , 0.76613069]]),
 array([[0.63145688, 0.65948463, 0.69075509, 0.7452    , 0.76032254,
         0.76748158, 0.77166478, 0.77464287, 0.77284161]])]

In [162]:
models_returns_mean = [
    np.mean(results, axis=0)
    for results in model_results
]
models_returns_std = [
    np.std(results, axis=0)
    for results in model_results
]

In [163]:
models_returns_mean

[array([0.64251791, 0.71796598, 0.73635058, 0.75997916, 0.77495008,
        0.7747355 , 0.78354669, 0.78574093, 0.78351501]),
 array([0.63719196, 0.69061012, 0.70615525, 0.76035325, 0.76373463,
        0.76878421, 0.77303177, 0.77640959, 0.77666995]),
 array([0.65029066, 0.71444407, 0.72683907, 0.75622742, 0.76913678,
        0.77342286, 0.77927146, 0.78213169, 0.78198311]),
 array([0.63769263, 0.68523306, 0.71061937, 0.74504234, 0.75781783,
        0.76331387, 0.76626955, 0.7665512 , 0.76613069]),
 array([0.63145688, 0.65948463, 0.69075509, 0.7452    , 0.76032254,
        0.76748158, 0.77166478, 0.77464287, 0.77284161])]

In [164]:
fig = go.Figure()

for i in range(n_models):
    returns_mean = models_returns_mean[i]
    returns_std = models_returns_std[i]

    fig.add_trace(
        go.Scatter(
            x=indices,
            y=returns_mean,
            mode="markers+lines",
            name=model_legend_names[i],
            marker=dict(
                size=4,
                color=colorway[i]
            ),
        )
    )

    fig.add_trace(
        go.Scatter(
            x=indices + indices[::-1],
            y=list(returns_mean + returns_std) + \
              list(returns_mean - returns_std)[::-1],
            fill="tozerox",
            fillcolor=fillcolors[i],
            line_color="rgba(255,255,255,0)",
            showlegend=False
        )
    )

argmax = np.argmax(models_returns_mean) % len(models_returns_mean[0])
best_model = np.argmax(models_returns_mean) // len(models_returns_mean[0])
max = np.max(models_returns_mean)
fig.add_annotation(
    x=indices[argmax],
    y=max,
    text=f"{model_legend_names[best_model]}: {max:.4f}",
    showarrow=True,
    arrowhead=6,
    arrowsize=0.75
)

fig.update_layout(
    title=title,
    xaxis=dict(
        tickangle=60,
        tickvals=indices,
        ticktext=ticktext,
        range=[0, 6_250_000],
        title="Training Steps"
    ),
    yaxis=dict(
        range=[0.62, 0.79],
        dtick=0.02,
        title="Discounted Return"
    ),
    width=850, height=500,
    template=template
)
fig.update_yaxes(template.layout.yaxis)
fig.update_xaxes(template.layout.xaxis)
fig.show()
fig.write_image(f"./visualizations/results/{save_title}.pdf")