In [15]:
import pandas as pd

df = pd.read_csv('./nocom_scores.csv', dtype={'model_bad_id': 'object', 'model_good_id': 'object'})

In [16]:
df2 = pd.merge(df, df, left_on=['model_bad_id', 'model_good_id'], right_on=['model_good_id', 'model_bad_id'], suffixes=('_A', '_B'))
df2.drop(['model_bad_name_A', 'model_bad_step_A', 'model_good_name_A', 'model_good_step_A',  'model_bad_name_B', 'model_bad_step_B', 'model_good_name_B',
       'model_good_step_B'], axis=1, inplace=True)

df2.rename(columns={"model_bad_id_A": "model_id", "model_good_id_A": "adversary_id", "reward_bad_A": "bad_reward", "reward_good_B": "good_reward"}, inplace=True)
df2.drop(['reward_good_A', 'model_bad_id_B', 'model_good_id_B', 'reward_bad_B'], axis=1, inplace=True)
df2 = df2[df2['model_id'] != df2['adversary_id']]

In [17]:
def compute_offset(elos, a, b, scoreBad, scoreGood):
    eloA = elos[elos['model_id'] == a]['elo'].values[0]
    eloB = elos[elos['model_id'] == b]['elo'].values[0]
    S = (scoreBad + scoreGood) / 2
    E = 120 / (1 + 10 ** ((eloB - eloA) / 3500))
    return 25 * (S - E)

In [18]:
elos = pd.DataFrame({'model_id': df['model_bad_id'].unique(), 'elo': 1000})
for _ in range(20):
    elos_tmp = elos.copy()
    for i in range(len(elos)):
        name = elos['model_id'].iloc[i]
        offsets = df2[df2['model_id'] == name].apply(lambda x: compute_offset(elos, x['model_id'], x['adversary_id'], x['bad_reward'], x['good_reward']), axis=1)
        eloA = elos.iloc[i]['elo']
        elos_tmp.loc[i, 'elo'] = eloA + (offsets.sum() / len(offsets))
    elos = elos_tmp

In [19]:
elos_named = pd.merge(elos, df.drop(['model_good_id', 'model_good_name', 'model_good_step', 'reward_bad', 'reward_good'], axis=1).drop_duplicates(), left_on='model_id', right_on='model_bad_id', suffixes=('_elo', '_df'), how='inner')
elos_named.drop(['model_bad_id'], axis=1, inplace=True)
elos_named.rename(columns={"model_bad_id": "name", "model_bad_step": "step"}, inplace=True)
elos_named

Unnamed: 0,model_id,elo,model_bad_name,step
0,0,1253.322161,RNN h=128 m=128 P2,64000128
1,1,1254.903216,RNN h=128 m=128 P2,76800128
2,2,1269.406378,RNN h=128 m=128 P2,80000128
3,3,879.148383,NN h=32 sample_batch=False P2,12800000
4,4,1055.627694,NN h=32 sample_batch=False P2,25600000
...,...,...,...,...
130,130,967.283577,NN h=32 sample_batch=True P1,64000000
131,131,1020.622024,NN h=32 sample_batch=True P1,76800000
132,132,1039.201764,NN h=32 sample_batch=True P1,80000000
133,random,707.035640,random,0


In [20]:
elos_named.sort_values(by=['elo'], ascending=False)


Unnamed: 0,model_id,elo,model_bad_name,step
16,16,1368.755622,NN h=32 sample_batch=True P2,80000000
14,14,1368.637921,NN h=32 sample_batch=True P2,64000000
15,15,1356.491019,NN h=32 sample_batch=True P2,76800000
46,46,1344.888441,RNN h=64 m=128 P2,64000000
9,9,1337.222077,NN h=32 sample_batch=False P2,80000000
...,...,...,...,...
56,56,620.300710,RNN h=32 m=128 P2,12800000
91,91,614.422533,RNN h=64 m=128 P1,12800000
77,77,569.986381,RNN h=32 m=128 P1,12800000
98,98,550.914826,RNN h=64 m=64 P1,12800000


In [21]:
table = elos_named.groupby('model_bad_name').max('elo')
table.drop('step', axis=1, inplace=True)
table

Unnamed: 0_level_0,elo
model_bad_name,Unnamed: 1_level_1
NN h=32 sample_batch=False P1,1086.559685
NN h=32 sample_batch=False P2,1337.222077
NN h=32 sample_batch=True P1,1039.201764
NN h=32 sample_batch=True P2,1368.755622
NN h=64 sample_batch=False P1,1018.28111
NN h=64 sample_batch=False P2,1254.496923
NN h=64 sample_batch=True P1,1045.016148
NN h=64 sample_batch=True P2,1298.031336
RNN h=128 m=128 P2,1269.406378
RNN h=128 m=64 P1,900.956213


In [22]:
table_P2 = table.filter(like='P2', axis=0).sort_values(by=['elo'], ascending=False)
table_P2.reset_index(inplace=True)
table_P2['model_bad_name'] = table_P2['model_bad_name'].apply(lambda x: x.replace('P2', ''))
table_P2['elo'] = table_P2['elo'].round(0).astype(int)
table_P2

Unnamed: 0,model_bad_name,elo
0,NN h=32 sample_batch=True,1369
1,RNN h=64 m=128,1345
2,NN h=32 sample_batch=False,1337
3,RNN h=64 m=64,1307
4,NN h=64 sample_batch=True,1298
5,RNN h=32 m=64,1282
6,RNN h=128 m=128,1269
7,RNN h=128 m=64,1264
8,NN h=64 sample_batch=False,1254
9,RNN h=32 m=128,1177


In [23]:
print(table_P2.to_latex(index=False))

\begin{tabular}{lr}
\toprule
model_bad_name & elo \\
\midrule
NN h=32 sample_batch=True  & 1369 \\
RNN h=64 m=128  & 1345 \\
NN h=32 sample_batch=False  & 1337 \\
RNN h=64 m=64  & 1307 \\
NN h=64 sample_batch=True  & 1298 \\
RNN h=32 m=64  & 1282 \\
RNN h=128 m=128  & 1269 \\
RNN h=128 m=64  & 1264 \\
NN h=64 sample_batch=False  & 1254 \\
RNN h=32 m=128  & 1177 \\
\bottomrule
\end{tabular}



In [24]:
import plotly.express as px
import plotly.graph_objects as go


In [25]:
def save_fig(fig, name):
    fig.update_layout(legend_title_text='', showlegend=True)
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ))

    fig.update_layout(width =500, height=220, 
                    font_family="Serif", font_size=11, title_font_size=13, 
                    margin_l=5, margin_t=1, margin_b=1, margin_r=5)
    fig.update_yaxes(nticks=12)
    fig.update_layout(title="")
    import plotly.io as pio
    #save a figure of 300dpi, width 1.5 inches, height 0.75inches
    pio.write_image(fig, f"./img/{name}.png")
    fig.show()

In [26]:
fig_nn_p2 = go.Figure()
fig_nn_p1 = go.Figure()
fig_rnn_p2 = go.Figure()
fig_rnn_p1 = go.Figure()
for name, group in elos_named.groupby('model_bad_name'):
    group.loc[-1] = [1000000, 707.035640, 'random', 0]
    group = group.sort_values(by=['step'])

    if 'RNN' in name and 'P1' in name:
        fig = fig_rnn_p1
    elif 'RNN' in name and 'P2' in name:
        fig = fig_rnn_p2
    elif 'NN' in name and 'P1' in name:
        fig = fig_nn_p1
    elif 'NN' in name and 'P2' in name:
        fig = fig_nn_p2
   
    if name == 'random' or name == 'heuristic':
        continue

    display_name = name
    if 'RNN' not in display_name:
        display_name = display_name.replace('NN', 'MLP')
        display_name = display_name.replace('sample_batch', 'sb')
  
    fig.add_trace(go.Scatter(x=group['step']/8, y=group['elo'], name=display_name))
   

fig_nn_p2.add_hline(y=707.035640, line_dash="dot", line_color="gray", annotation_text='random')
fig_nn_p2.add_hline(y=1324.405769	, line_dash="dot", line_color="green", annotation_text='heuristic', annotation_position="top left")
fig_nn_p2.update_layout(title=f'ELO score over time MLP P2', xaxis_title='Number of games', yaxis_title='ELO score')
fig_nn_p2.show()
save_fig(fig_nn_p2, 'nocom_elo_mlp_p2')

fig_rnn_p2.add_hline(y=707.035640, line_dash="dot", line_color="gray", annotation_text='random')
fig_rnn_p2.add_hline(y=1324.405769	, line_dash="dot", line_color="green", annotation_text='heuristic', annotation_position="top left")
fig_rnn_p2.update_layout(title=f'ELO score over time RNN P2', xaxis_title='Number of games', yaxis_title='ELO score')
fig_rnn_p2.show()
save_fig(fig_rnn_p2, 'nocom_elo_rnn_p2')

fig_nn_p1.add_hline(y=707.035640, line_dash="dot", line_color="gray", annotation_text='random')
fig_nn_p1.add_hline(y=1324.405769	, line_dash="dot", line_color="green", annotation_text='heuristic', annotation_position="top left")
fig_nn_p1.update_layout(title=f'ELO score over time MLP P1', xaxis_title='Number of games', yaxis_title='ELO score')
fig_nn_p1.show()
save_fig(fig_nn_p1, 'nocom_elo_mlp_p1')

fig_rnn_p1.add_hline(y=707.035640, line_dash="dot", line_color="gray", annotation_text='random')
fig_rnn_p1.add_hline(y=1324.405769	, line_dash="dot", line_color="green", annotation_text='heuristic', annotation_position="top left")
fig_rnn_p1.update_layout(title=f'ELO score over time MLP P1', xaxis_title='Number of games', yaxis_title='ELO score')
fig_rnn_p1.show()
save_fig(fig_rnn_p1, 'nocom_elo_rnn_p1')