In [62]:
import pandas as pd
import numpy as np

In [63]:
df_statistics_closest = pd.read_csv("./closest/statistics.csv")
df_statistics_hungarian = pd.read_csv("./hungarian/statistics.csv")

## Methods that consistently appear in top 10 

In [64]:
# Create a set of submission IDs of top 10 methods according to RMSE
# Statistics table was already ordered by RMSE
top_N=10

# Order by increasing RMSE
df_statistics_rmse_closest = df_statistics_closest.sort_values(by='RMSE', ascending=True).reset_index(drop=True)
df_statistics_rmse_hungarian = df_statistics_hungarian.sort_values(by='RMSE', ascending=True).reset_index(drop=True)

df_rmse_top_closest = df_statistics_rmse_closest.loc[0:top_N,:]
submission_ID_rmse_top_closest = set(df_rmse_top_closest["ID"])

df_rmse_top_hungarian = df_statistics_rmse_hungarian.loc[0:top_N,:]
submission_ID_rmse_top_hungarian = set(df_rmse_top_hungarian["ID"])

In [65]:
# Create a set of submission IDs of top 10 methods according to MAE

# Order by increasing MAE
df_statistics_mae_closest = df_statistics_closest.sort_values(by='MAE', ascending=True).reset_index(drop=True)
df_statistics_mae_hungarian = df_statistics_hungarian.sort_values(by='MAE', ascending=True).reset_index(drop=True)

# Determine submission ID of top 10
df_mae_top_closest = df_statistics_mae_closest.loc[0:top_N,:]
submission_ID_mae_top_closest = set(df_mae_top_closest["ID"])

df_mae_top_hungarian = df_statistics_mae_hungarian.loc[0:top_N,:]
submission_ID_mae_top_hungarian = set(df_mae_top_hungarian["ID"])

In [66]:
# Create a set of submission IDs of top 10 methods according to R-squareed

# Order by decreasing R-squared
df_statistics_r2_closest = df_statistics_closest.sort_values(by='R2', ascending=False).reset_index(drop=True)
df_statistics_r2_hungarian = df_statistics_hungarian.sort_values(by='R2', ascending=False).reset_index(drop=True)

# Determine submission ID of top 10
df_r2_top_closest = df_statistics_r2_closest.loc[0:top_N,:]
submission_ID_r2_top_closest = set(df_r2_top_closest["ID"])

df_r2_top_hungarian = df_statistics_r2_hungarian.loc[0:top_N,:]
submission_ID_r2_top_hungarian = set(df_r2_top_hungarian["ID"])

In [67]:
# Create a set of submission IDs of top 10 methods according to Kendall's Tau

# Order by decreasing Tau
df_statistics_tau_closest = df_statistics_closest.sort_values(by='kendall_tau', ascending=False).reset_index(drop=True)
df_statistics_tau_hungarian = df_statistics_hungarian.sort_values(by='kendall_tau', ascending=False).reset_index(drop=True)

# Determine submission ID of top 10
df_tau_top_closest = df_statistics_tau_closest.loc[0:top_N,:]
submission_ID_tau_top_closest = set(df_tau_top_closest["ID"])

df_tau_top_hungarian = df_statistics_tau_hungarian.loc[0:top_N,:]
submission_ID_tau_top_hungarian = set(df_tau_top_hungarian["ID"])

In [68]:
# Determine intersection of 4 sets: Methods that are in top N according to all 4 statistics
common_methods_closest_4 = (submission_ID_rmse_top_closest & 
                          submission_ID_mae_top_closest & 
                          submission_ID_r2_top_closest & 
                          submission_ID_tau_top_closest)
common_methods_closest_4

{'8xt50', 'gyuhx', 'xmyhm', 'xvxzd', 'yqkga'}

In [69]:
common_methods_closest_3 = (submission_ID_rmse_top_closest & 
                          submission_ID_mae_top_closest & 
                          submission_ID_r2_top_closest)
common_methods_closest_3

{'8xt50', 'gyuhx', 'nb017', 'xmyhm', 'xvxzd', 'yqkga'}

In [77]:
common_methods_closest_2 = (submission_ID_rmse_top_closest & 
                          submission_ID_mae_top_closest)
common_methods_closest_2

{'8xt50',
 'gyuhx',
 'nb007',
 'nb010',
 'nb013',
 'nb017',
 'xmyhm',
 'xvxzd',
 'yqkga'}

In [70]:
# Determine intersection of 4 sets: Methods that are in top N according to all 4 statistics
common_methods_hungarian_4 = (submission_ID_rmse_top_hungarian & 
                          submission_ID_mae_top_hungarian & 
                          submission_ID_r2_top_ & 
                          submission_ID_tau_top_closest)
common_methods_hungarian_4

{'8xt50', 'gyuhx', 'xmyhm', 'xvxzd'}

In [71]:
common_methods_hungarian_3 = (submission_ID_rmse_top_hungarian & 
                          submission_ID_mae_top_hungarian & 
                          submission_ID_r2_top_hungarian)
common_methods_hungarian_3

{'8xt50', 'gyuhx', 'nb017', 'xmyhm', 'xvxzd'}

In [78]:
common_methods_hungarian_2 = (submission_ID_rmse_top_hungarian & 
                          submission_ID_mae_top_hungarian)
common_methods_hungarian_2

{'8xt50',
 'gyuhx',
 'nb007',
 'nb010',
 'nb013',
 'nb015',
 'nb017',
 'xmyhm',
 'xvxzd',
 'yqkga'}

In [79]:
common_methods_4 = (common_methods_closest_4 & common_methods_hungarian_4)
common_methods_3 = (common_methods_closest_3 & common_methods_hungarian_3)
common_methods_2 = (common_methods_closest_2 & common_methods_hungarian_2)

In [74]:
df_best = df_statistics_closest[df_statistics_closest["ID"].isin(common_methods_3)].reset_index(drop=True)
#df_best.to_csv("statistics_8_top_methods.csv", index=False)
df_best

Unnamed: 0,ID,name,RMSE,RMSE_lower_bound,RMSE_upper_bound,MAE,MAE_lower_bound,MAE_upper_bound,ME,ME_lower_bound,ME_upper_bound,R2,R2_lower_bound,R2_upper_bound,m,m_lower_bound,m_upper_bound,kendall_tau,kendall_tau_lower_bound,kendall_tau_upper_bound
0,xvxzd,Full quantum chemical calculation of free ener...,0.680076,0.54216,0.812364,0.578621,0.447931,0.711379,0.235172,0.002069,0.45931,0.937302,0.87833,0.972659,0.923111,0.835591,1.016554,0.816277,0.681234,0.917949
1,gyuhx,S+pKa,0.729982,0.546547,0.914637,0.578667,0.426667,0.747333,0.009333,-0.263,0.267667,0.925127,0.868729,0.964024,0.996287,0.910033,1.108903,0.879172,0.799049,0.943529
2,xmyhm,ACD/pKa Classic,0.773914,0.49939,1.026762,0.545667,0.365667,0.753,0.101667,-0.177333,0.376,0.915851,0.831674,0.968137,0.980863,0.869364,1.104884,0.798155,0.668246,0.896311
3,nb017,MoKa,0.942726,0.724967,1.153809,0.77,0.585484,0.97,-0.162258,-0.493226,0.155806,0.884296,0.808502,0.937368,0.938772,0.820788,1.076271,0.72649,0.596611,0.835625
4,8xt50,ReSCoSS conformations // DSD-BLYP-D3 reranking...,1.07143,0.77869,1.354442,0.814194,0.575806,1.07,-0.474839,-0.81,-0.138387,0.905535,0.840955,0.95127,1.078111,0.935981,1.218259,0.800862,0.682929,0.89067


In [75]:
df_best = df_statistics_closest[df_statistics_closest["ID"].isin(common_methods_4)].reset_index(drop=True)
#df_best.to_csv("statistics_8_top_methods.csv", index=False)
df_best

Unnamed: 0,ID,name,RMSE,RMSE_lower_bound,RMSE_upper_bound,MAE,MAE_lower_bound,MAE_upper_bound,ME,ME_lower_bound,ME_upper_bound,R2,R2_lower_bound,R2_upper_bound,m,m_lower_bound,m_upper_bound,kendall_tau,kendall_tau_lower_bound,kendall_tau_upper_bound
0,xvxzd,Full quantum chemical calculation of free ener...,0.680076,0.54216,0.812364,0.578621,0.447931,0.711379,0.235172,0.002069,0.45931,0.937302,0.87833,0.972659,0.923111,0.835591,1.016554,0.816277,0.681234,0.917949
1,gyuhx,S+pKa,0.729982,0.546547,0.914637,0.578667,0.426667,0.747333,0.009333,-0.263,0.267667,0.925127,0.868729,0.964024,0.996287,0.910033,1.108903,0.879172,0.799049,0.943529
2,xmyhm,ACD/pKa Classic,0.773914,0.49939,1.026762,0.545667,0.365667,0.753,0.101667,-0.177333,0.376,0.915851,0.831674,0.968137,0.980863,0.869364,1.104884,0.798155,0.668246,0.896311
3,8xt50,ReSCoSS conformations // DSD-BLYP-D3 reranking...,1.07143,0.77869,1.354442,0.814194,0.575806,1.07,-0.474839,-0.81,-0.138387,0.905535,0.840955,0.95127,1.078111,0.935981,1.218259,0.800862,0.682929,0.89067


In [80]:
df_best = df_statistics_closest[df_statistics_closest["ID"].isin(common_methods_2)].reset_index(drop=True)
#df_best.to_csv("statistics_8_top_methods.csv", index=False)
df_best

Unnamed: 0,ID,name,RMSE,RMSE_lower_bound,RMSE_upper_bound,MAE,MAE_lower_bound,MAE_upper_bound,ME,ME_lower_bound,ME_upper_bound,R2,R2_lower_bound,R2_upper_bound,m,m_lower_bound,m_upper_bound,kendall_tau,kendall_tau_lower_bound,kendall_tau_upper_bound
0,xvxzd,Full quantum chemical calculation of free ener...,0.680076,0.54216,0.812364,0.578621,0.447931,0.711379,0.235172,0.002069,0.45931,0.937302,0.87833,0.972659,0.923111,0.835591,1.016554,0.816277,0.681234,0.917949
1,gyuhx,S+pKa,0.729982,0.546547,0.914637,0.578667,0.426667,0.747333,0.009333,-0.263,0.267667,0.925127,0.868729,0.964024,0.996287,0.910033,1.108903,0.879172,0.799049,0.943529
2,xmyhm,ACD/pKa Classic,0.773914,0.49939,1.026762,0.545667,0.365667,0.753,0.101667,-0.177333,0.376,0.915851,0.831674,0.968137,0.980863,0.869364,1.104884,0.798155,0.668246,0.896311
3,yqkga,ReSCoSS conformations // COSMOtherm pKa,0.902594,0.685167,1.12153,0.709667,0.515667,0.914667,-0.288333,-0.587667,0.032,0.900614,0.820011,0.952205,0.999846,0.867829,1.131561,0.837745,0.722751,0.922527
4,nb017,MoKa,0.942726,0.724967,1.153809,0.77,0.585484,0.97,-0.162258,-0.493226,0.155806,0.884296,0.808502,0.937368,0.938772,0.820788,1.076271,0.72649,0.596611,0.835625
5,nb007,Epik Scan,0.945751,0.732761,1.15589,0.776129,0.595161,0.976452,0.045161,-0.289677,0.36871,0.878516,0.763718,0.945147,0.839672,0.768161,0.922391,0.787031,0.649901,0.890877
6,nb010,Epik Microscopic,1.028061,0.76542,1.271502,0.814194,0.601935,1.046452,0.243226,-0.108065,0.587742,0.869029,0.769497,0.940035,0.945752,0.825158,1.074762,0.800004,0.667613,0.900337
7,8xt50,ReSCoSS conformations // DSD-BLYP-D3 reranking...,1.07143,0.77869,1.354442,0.814194,0.575806,1.07,-0.474839,-0.81,-0.138387,0.905535,0.840955,0.95127,1.078111,0.935981,1.218259,0.800862,0.682929,0.89067
8,nb013,Jaguar,1.103116,0.706545,1.466844,0.803226,0.557419,1.087419,-0.148387,-0.55129,0.216129,0.883942,0.787808,0.94626,1.091843,0.90747,1.253628,0.79225,0.642545,0.900677
