In [68]:
import pandas as pd
import pandas as pd
import datetime
import numpy as np
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import pandas as pd


In [69]:
def save_fig(fig, name):
    fig.update_layout(legend_title_text='', showlegend=True)
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ))

    fig.update_layout(width =500, height=200, 
                    font_family="Serif", font_size=12, title_font_size=13, 
                    margin_l=5, margin_t=1, margin_b=1, margin_r=5)

    fig.update_yaxes(nticks=10)
    fig.update_layout(title="")
    import plotly.io as pio
    #save a figure of 300dpi, width 1.5 inches, height 0.75inches
    pio.write_image(fig, f"../experiments/img/{name}.pdf", width=1.5*300, height=0.75*300)
    fig.show()

In [70]:
def prepare_graphs(df, name, ys):
    colours = ['royalblue', 'coral', 'mediumseagreen', 'slategray']
    fig = px.line(df, x="Step", y=ys, render_mode="svg", color_discrete_sequence=colours)
    fig.update_traces(opacity=.8)
    save_fig(fig, f'{name}_original')

    fig = px.scatter(df, x="Step",  y=ys, trendline="ewm", trendline_options=dict(halflife=10),
                title="", color_discrete_sequence=colours)
    fig.update_yaxes(title='Reward')
    fig.update_xaxes(title='Training step')
    fig.data = [t for t in fig.data if t.mode == "lines"]
    fig.update_traces(opacity=.8)
    fig.update_traces(showlegend=True) #trendlines have showlegend=False by default
    save_fig(fig, f'{name}_ewn')

In [71]:
df_heuristic = pd.read_csv('./milestone_heuristic.csv')
df_heuristic['Baseline Heuristic'] = 84.282
ys_heuristic = ['RAINBOW caller & callee heuristic', 'DDQN caller & callee heuristic', 'SAC caller & callee heuristic', 'Baseline Heuristic']
prepare_graphs(df_heuristic, 'heuristic', ys_heuristic)

In [72]:
df_caller = pd.read_csv('./milestone_caller.csv')
df_caller['Baseline Random'] = 75.887
ys_caller = ['RAINBOW caller & callee random', 'SAC caller & callee random',  'DDQN caller & callee random', 'Baseline Random']
prepare_graphs(df_caller, 'caller', ys_caller)

In [73]:
df_rainbow = pd.read_csv('./milestone_rainbow.csv')
df_rainbow['Baseline Random'] = 75.887
ys_rainbow = ['RAINBOW caller/callee jointly',  'RAINBOW caller/callee cyclic epoch=10',  'RAINBOW caller/callee cyclic epoch=5', 'Baseline Random']
prepare_graphs(df_rainbow, 'rainbow', ys_rainbow)

In [74]:
df_SAC = pd.read_csv('./milestone_SAC.csv')
df_SAC['Baseline Random'] = 75.887
ys_SAC = ['SAC caller/callee jointly',  'SAC caller/callee cyclic epoch=10',  'SAC caller/callee cyclic epoch=5', 'Baseline Random']
prepare_graphs(df_SAC, 'sac', ys_SAC)

In [75]:
max_reward = pd.concat([df_SAC.max()[ys_SAC],
df_rainbow.max()[ys_rainbow],
df_heuristic.max()[ys_heuristic],
df_caller.max()[ys_caller]]).sort_values(ascending=False)

In [76]:
print(max_reward.to_latex())

\begin{tabular}{lr}
\toprule
{} &          0 \\
\midrule
SAC caller \& callee heuristic         &  86.967003 \\
DDQN caller \& callee heuristic        &  86.769997 \\
RAINBOW caller \& callee heuristic     &  86.042000 \\
Baseline Heuristic                    &  84.282000 \\
SAC caller/callee cyclic epoch=10     &  83.259003 \\
SAC caller/callee jointly             &  83.185997 \\
SAC caller/callee cyclic epoch=5      &  82.903999 \\
RAINBOW caller/callee cyclic epoch=5  &  82.134003 \\
RAINBOW caller/callee cyclic epoch=10 &  81.399002 \\
RAINBOW caller/callee jointly         &  80.973000 \\
RAINBOW caller \& callee random        &  79.930000 \\
SAC caller \& callee random            &  79.861000 \\
DDQN caller \& callee random           &  79.654999 \\
Baseline Random                       &  75.887000 \\
Baseline Random                       &  75.887000 \\
Baseline Random                       &  75.887000 \\
\bottomrule
\end{tabular}




In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [77]:
df_heuristic

Unnamed: 0,Step,RAINBOW caller & callee heuristic,RAINBOW caller & callee heuristic__MIN,RAINBOW caller & callee heuristic__MAX,DDQN caller & callee heuristic,DDQN caller & callee heuristic__MIN,DDQN caller & callee heuristic__MAX,SAC caller & callee heuristic,SAC caller & callee heuristic__MIN,SAC caller & callee heuristic__MAX,Baseline Heuristic
0,1,83.399002,83.399002,83.399002,83.857002,83.857002,83.857002,84.011002,84.011002,84.011002,84.282
1,8000,83.412003,83.412003,83.412003,83.696999,83.696999,83.696999,82.827003,82.827003,82.827003,84.282
2,15999,84.855003,84.855003,84.855003,83.920998,83.920998,83.920998,83.720001,83.720001,83.720001,84.282
3,23998,83.191002,83.191002,83.191002,83.916000,83.916000,83.916000,84.211998,84.211998,84.211998,84.282
4,31997,84.405998,84.405998,84.405998,84.735001,84.735001,84.735001,85.871002,85.871002,85.871002,84.282
...,...,...,...,...,...,...,...,...,...,...,...
195,1559806,84.707001,84.707001,84.707001,84.827003,84.827003,84.827003,84.436996,84.436996,84.436996,84.282
196,1567805,83.702003,83.702003,83.702003,83.135002,83.135002,83.135002,84.027000,84.027000,84.027000,84.282
197,1575804,83.716003,83.716003,83.716003,83.681000,83.681000,83.681000,84.558998,84.558998,84.558998,84.282
198,1583803,83.544998,83.544998,83.544998,85.318001,85.318001,85.318001,84.164001,84.164001,84.164001,84.282
