In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [25]:
import os
import pandas as pd
import glob
import pickle

os.sys.path.insert(0, '../evaluation')

from evaluate import evaluate_all_systems, preprocess_all_models

In [26]:
models = [os.path.basename(p) for p in glob.glob(f'../data/models/dev/*')]

In [27]:
dfs = []
for model in models:
    if os.path.isfile(f'../data/models/dev/{model}/system_evaluation.csv'):
        df_ = pd.read_csv(f'../data/models/dev/{model}/system_evaluation.csv', index_col=['subset', 'references', 'metric'])
        dfs.append(df_)

scores_df = pd.concat(dfs, keys=models).unstack()
scores_df.columns = scores_df.columns.droplevel()

In [28]:
params_dfs = []
for model in models:
    with open(f'../data/models/dev/{model}/params.pkl', 'rb') as f:
        params_dfs.append(pd.DataFrame([pickle.load(f)], index=[model]))
    
params_df = pd.concat(params_dfs)
params_df.tems_lm_n.fillna(0, inplace=True)
params_df.txs_lm_n.fillna(0, inplace=True)

In [29]:
elapsed_time_dfs = []
for model in models:
    with open(f'../data/models/dev/{model}/elapsed_time.txt', 'r') as f:
        elapsed_time_dfs.append(pd.DataFrame(data=[float(f.readline()[:-1])], index=[model], columns=['elapsed_time']))
elapsed_time_df = pd.concat(elapsed_time_dfs)

In [30]:
scores_all = scores_df.loc[(slice(None), 'all-cat', slice(None)), :].reset_index(level=[1, 2], drop=True)

df_all = pd.merge(scores_all, params_df, left_index=True, right_index=True)
df_all = pd.merge(df_all, elapsed_time_df, left_index=True, right_index=True)

cols = ['bleu', 'meteor', 'ter', 'elapsed_time', 'dp_scorer_n', 'sa_scorer_n', 'tems_lm_n', 'referrer_lm_n', 'txs_lm_n', 'max_dp', 'max_sa', 'max_refs', 'max_tems']
df_all = df_all[cols]
df_all.sort_values(by='bleu', ascending=False)

Unnamed: 0,bleu,meteor,ter,elapsed_time,dp_scorer_n,sa_scorer_n,tems_lm_n,referrer_lm_n,txs_lm_n,max_dp,max_sa,max_refs,max_tems
8427712258203685535,60.67,0.439775,0.350059,25.549334,2,3,4,4,3,5,5,1,2
-6120210850613917316,60.65,0.440085,0.349864,26.11855,2,2,4,4,3,5,5,1,2
8608132448910299145,60.53,0.43911,0.350254,25.506509,2,2,4,3,3,5,5,1,2
2528891278422703928,60.46,0.441876,0.356933,7.24884,2,2,4,4,4,5,5,1,1
4901813669408019549,60.43,0.439895,0.354691,7.174427,2,2,4,4,3,5,5,1,1
-2785595828672947740,60.42,0.442309,0.350449,25.454383,2,2,4,3,4,5,5,1,2
-1922451055525070619,60.41,0.441268,0.357128,7.072687,2,2,4,3,4,5,5,1,1
-468997385542151278,60.39,0.438812,0.35079,24.17422,2,3,4,3,3,5,5,1,2
6316591217736520224,60.33,0.439481,0.353569,7.214782,2,2,4,3,3,5,5,1,1
3106577197184455726,60.31,0.441738,0.35157,7.411245,2,3,4,3,4,5,5,1,1


In [15]:
df_all.groupby(['dp_scorer_n', 'sa_scorer_n', 'tems_lm_n', 'txs_lm_n']).bleu.describe().sort_values('75%', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,25%,50%,75%,max
dp_scorer_n,sa_scorer_n,tems_lm_n,txs_lm_n,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,2,4,4,3.0,60.153333,0.488501,59.59,60.0,60.41,60.435,60.46
2,3,4,4,2.0,60.27,0.056569,60.23,60.25,60.27,60.29,60.31
2,3,4,3,1.0,60.26,,60.26,60.26,60.26,60.26,60.26
2,2,3,3,3.0,59.906667,0.594755,59.22,59.73,60.24,60.25,60.26
2,2,4,3,3.0,60.03,0.310483,59.71,59.88,60.05,60.19,60.33
2,3,3,4,2.0,59.875,0.784889,59.32,59.5975,59.875,60.1525,60.43
2,3,3,3,3.0,59.653333,0.759232,59.21,59.215,59.22,59.875,60.53
2,2,3,4,3.0,59.69,0.480729,59.35,59.415,59.48,59.86,60.24


In [14]:
df_all[(df_all.txs_lm_n == 3) & (df_all.txs_lm_name == 'lower') & (df_all.tems_lm_name == 'lower') & (df_all.tems_lm_n == 3) & (df_all.sa_scorer == 'lower')].sort_values('bleu', ascending=False)

Unnamed: 0,bleu,meteor,ter,dp_scorer,max_dp,sa_scorer,max_sa,tems_lm_name,tems_lm_n,max_tems,referrer,txs_lm_name,txs_lm_n


In [55]:
df_all.columns = ['BLEU', 'METEOR', 'TER', 'MPerm', 'n_MPerm', 'MPart', 'n_MPart', 'MTemp', 'MTemp_N', 'n_MTemp', 'MREG', 'MTRank', 'MTRank_N']

In [58]:
df_all = df_all[df_all.MPerm != 'gold']
df_all.to_csv('../data/results/dev_all_results.csv')

In [49]:
scores_5 = scores_df.loc[(slice(None), '5size', slice(None)), :].reset_index(level=[1, 2], drop=True)

df_5 = pd.merge(scores_5, params_df, left_index=True, right_index=True)

cols = ['bleu', 'meteor', 'ter', 'dp_scorer', 'max_dp', 'sa_scorer', 'max_sa', 'tems_lm_name', 'tems_lm_n', 'max_tems', 'referrer', 'txs_lm_name', 'txs_lm_n']
df_5.sort_values('bleu', ascending=False).loc[:, cols].head()

Unnamed: 0,bleu,meteor,ter,dp_scorer,max_dp,sa_scorer,max_sa,tems_lm_name,tems_lm_n,max_tems,referrer,txs_lm_name,txs_lm_n
37677081755143658,52.81,,,ltr_lasso,2,inv_ltr_lasso,4,lower,3,2,counter,lower,3
4692801175971517865,52.69,,,ltr_lasso,2,inv_ltr_lasso,4,lower,3,2,counter,lower,6
7697843703963551123,52.28,,,ltr_lasso,2,inv_ltr_lasso,4,lower,6,2,counter,lower,3
-5304774152169464302,51.63,,,gold,2,gold,4,lower,3,2,counter,lower,3
-9198725233750128576,51.63,,,ltr_lasso,2,inv_ltr_lasso,4,lower,6,2,counter,lower,6


In [50]:
df_5 = df_5[df_5.dp_scorer != 'gold']
df_5.to_csv('../data/results/dev_5_results.csv')

# Groupby module bleu

## all-cat

In [57]:
df_all.groupby('dp_scorer').bleu.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
dp_scorer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ltr_lasso,104.0,39.655577,10.435262,17.74,27.6975,40.785,43.9125,58.18


In [58]:
df_all.groupby('sa_scorer').bleu.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
sa_scorer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
inv_ltr_lasso,32.0,39.498125,8.770645,25.11,35.87,40.435,42.8575,57.81
ltr_lasso,41.0,40.020244,11.269731,17.8,27.66,41.56,49.68,57.77
random,31.0,39.335806,11.16813,17.74,27.45,40.75,43.74,58.18


In [59]:
df_all.groupby(['tems_lm_name', 'tems_lm_n']).bleu.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
tems_lm_name,tems_lm_n,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
inv_lower,3,28.0,36.199286,11.64815,17.74,25.3575,40.41,43.5925,56.76
inv_lower,6,23.0,34.823043,8.843265,25.4,27.495,28.02,42.375,50.81
lower,3,28.0,43.064286,10.094556,25.11,39.3675,41.905,50.85,57.81
lower,6,25.0,44.1548,7.577394,25.11,40.08,41.56,49.92,58.18


In [60]:
df_all.groupby(['txs_lm_name', 'txs_lm_n']).bleu.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
txs_lm_name,txs_lm_n,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
inv_lower,3,26.0,38.853077,7.72934,25.11,38.8225,40.285,42.37,57.77
inv_lower,6,26.0,33.186154,9.356853,17.8,25.445,27.82,41.9925,56.93
lower,3,27.0,42.97963,11.17113,17.74,38.755,43.38,50.765,58.18
lower,6,25.0,43.6284,10.188479,24.94,40.22,43.24,50.55,57.16


In [61]:
df_all.groupby('referrer').bleu.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
referrer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
counter,47.0,40.484894,10.92962,25.11,27.85,41.92,43.61,58.18
inv_counter,9.0,31.513333,8.66355,17.8,25.11,34.0,38.16,43.45
preprocess_so,48.0,40.370208,9.749184,17.74,39.075,40.725,49.74,56.93


## 5size

In [62]:
df_5.groupby('dp_scorer').bleu.describe()

NameError: name 'df_5' is not defined

In [57]:
df_5.groupby('sa_scorer').bleu.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
sa_scorer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
inv_ltr_lasso,225.0,28.9544,9.065081,13.95,22.89,26.7,36.08,52.81
ltr_lasso,225.0,28.792222,8.684845,13.85,22.45,27.66,35.85,48.07
random,225.0,29.268889,8.946539,13.93,22.9,28.1,36.36,51.46


In [58]:
df_5.groupby(['tems_lm_name', 'tems_lm_n']).bleu.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
tems_lm_name,tems_lm_n,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
inv_lower,3,135.0,23.047407,6.155775,13.93,19.35,22.79,26.915,40.04
inv_lower,6,135.0,23.63837,6.496942,13.85,19.78,23.39,27.525,40.76
lower,3,135.0,35.073037,7.892178,22.47,27.74,37.02,40.57,52.81
lower,6,135.0,34.32763,7.874061,21.76,27.29,35.79,39.43,52.28
random,0,135.0,28.939407,7.914815,17.07,24.53,27.66,34.65,48.95


In [59]:
df_5.groupby(['txs_lm_name', 'txs_lm_n']).bleu.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
txs_lm_name,txs_lm_n,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
inv_lower,3,135.0,24.966593,7.31695,13.85,20.985,23.13,33.395,37.85
inv_lower,6,135.0,25.214,7.415394,14.1,21.205,23.25,33.865,38.35
lower,3,135.0,33.671852,8.81193,17.59,27.36,32.39,40.47,52.81
lower,6,135.0,33.362296,8.65187,17.73,27.37,32.17,39.965,52.69
random,0,135.0,27.811111,7.937564,15.01,23.05,25.23,35.425,44.04


# Lasso

In [9]:
df_all = df_all.drop(columns=['meteor', 'ter'])

In [15]:
from sklearn.linear_model import Lasso

In [37]:
X = df_all.drop(columns=['bleu', 'max_dp', 'max_sa', 'max_tems'])
y = df_all[['bleu']]

X_ohe = pd.get_dummies(X, columns=X.columns)

In [38]:
lasso = Lasso().fit(X_ohe, y)

In [39]:
lasso.coef_

array([ 0.        , -0.        ,  0.        , -0.        ,  0.        ,
       -0.        ,  0.        ,  0.        , -3.56075308,  3.79845913,
       -0.        , -0.        ,  0.        , -0.        ,  0.38945086,
       -9.21999944,  0.        , -1.13690988,  4.7128114 , -0.        ,
       -0.        ,  0.        , -0.        ])

In [50]:
result = pd.Series(data=lasso.coef_, index=X_ohe.columns)
result.sort_values(ascending=False)

txs_lm_name_lower          4.712811
tems_lm_name_lower         3.798459
referrer_counter           0.389451
txs_lm_n_6                -0.000000
tems_lm_name_random       -0.000000
dp_scorer_inv_ltr_lasso   -0.000000
dp_scorer_ltr_lasso        0.000000
dp_scorer_random          -0.000000
sa_scorer_gold             0.000000
sa_scorer_inv_ltr_lasso   -0.000000
sa_scorer_ltr_lasso        0.000000
sa_scorer_random           0.000000
tems_lm_n_0               -0.000000
txs_lm_n_3                 0.000000
tems_lm_n_3                0.000000
tems_lm_n_6               -0.000000
referrer_preprocess_so     0.000000
txs_lm_name_random        -0.000000
txs_lm_n_0                -0.000000
dp_scorer_gold             0.000000
txs_lm_name_inv_lower     -1.136910
tems_lm_name_inv_lower    -3.560753
referrer_inv_counter      -9.219999
dtype: float64