In [123]:
import mlflow
import pandas as pd
import re
from pprint import pprint
mlflow.set_tracking_uri('http://localhost:5000')

def drop_duplicates_and_zero_NA(results_df):
    results_df = results_df.drop_duplicates(subset=['params.run_hash'])

     # Zeroing NAN and other non-converged results
    results_df.loc[results_df['metrics.l1_match_val'].isna() | (results_df['metrics.test_hits_at_1'] == 1.0), 
                   ['metrics.l1_match_train', 'metrics.l1_match_val',
                   'metrics.train_mr', 'metrics.test_mr', 'metrics.test_hits_at_100',
                   'metrics.train_hits_at_100', 'metrics.train_hits_at_10',
                   'metrics.val_hits_at_10', 'metrics.val_hits_at_100',
                   'metrics.train_hits_at_50', 'metrics.test_hits_at_50', 'metrics.val_mr',
                   'metrics.test_mrr', 'metrics.train_mrr', 'metrics.val_hits_at_1',
                   'metrics.train_hits_at_1', 'metrics.val_mrr', 'metrics.test_hits_at_1',
                   'metrics.test_hits_at_10', 'metrics.val_hits_at_50']] = 0.0
    return results_df

def format_df(results, new_metrics_names = ['Hits@1','Hits@10','Hits@50'], 
              unstack_levels = [3, 2],
              hyperparam_names = [None, 'Weights'],
              params_names = ['No','Yes'], scale=1, round_digits=2):
    results.columns = results.columns.get_level_values(0)
    results = results*scale
    results = results.round(round_digits)
    results = results.stack(level=0).unstack(level=unstack_levels)
    results.sort_index(1, inplace=True)
    results.columns.set_levels(new_metrics_names, level=0, inplace=True)
    results.columns.set_levels(params_names, level=1, inplace=True)
    results.index.rename(['Dataset', 'Subset'], inplace=True)
    results.columns.rename(hyperparam_names, inplace=True)
    new_index = [(re.sub('_(.+)', r' (\g<1>)', dataset.upper()), re.sub('_', '-', subset)) for (dataset, subset) in results.index]
    results.index = pd.MultiIndex.from_tuples(new_index)
    return results

def to_latex(results):
    print(results.to_latex(bold_rows=True, 
                         column_format=r'*{2}{l}*{6}{@{\extracolsep{\fill}}r}@{\extracolsep{\fill}}'))

In [124]:
# Save all results to csv
results = mlflow.search_runs(experiment_ids=[1])
results = drop_duplicates_and_zero_NA(results)
results.to_csv('gcn-align-results.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


## 1. Hyperparameter search for dbp15k_jape (zh_en)
```python
 params_search = {
      'dataset_name': ['dbp15k_jape'],
      'subset_name': ['zh_en'],
      'num_epochs': [10, 500, 2000, 3000],
      'lr': [0.1, 0.5, 1, 10, 20],
      'eval_batch_size': [1000],  # 1000
      'embedding_dim': [200],
      'n_layers': [1, 2, 3],
      'num_negatives': [5, 50, 100],
      'use_edge_weights': [EdgeWeightsEnum.inverse_in_degree],  # None
      'use_conv_weights': [True, False],
      'conv_weight_init': [init.xavier_uniform_],
      'train_val_ratio': [0.7, 0.8],  # size of train subset in comparison with
      'node_embedding_init': ['total', 'none'],
      'optimizer': [optim.SGD, optim.Adam],
      'seed': [seed]
  }
```

In [3]:
results = mlflow.search_runs(experiment_ids=[1],
                             filter_string = "params.dataset_name = 'dbp15k_jape' and params.subset_name = 'zh_en'")
results = drop_duplicates_and_zero_NA(results)
results = results[results['params.lr'].isin(['0.1', '0.5', '1', '10', '20']) & 
                  results['params.n_layers'].isin(['1', '2', '3']) & 
                  results['params.num_negatives'].isin(['5', '50', '100']) &
                  results['params.num_epochs'].isin(['10', '500', '2000', '3000']) & 
                  results['params.train_val_ratio'].isin(['0.7', '0.8']) &
                  results['params.optimizer'].isin(['Adam', 'SGD'])]
len(results)

3615

In [7]:
parametrised = results[results['params.use_conv_weights'] == 'True']
non_parametrised = results[results['params.use_conv_weights'] == 'False']

init_total = results[results['params.node_embedding_init'] == 'total']
init_none = results[results['params.node_embedding_init'] == 'none']

### Best models for dbp15k_jape (zh_en)
#### Without weigths & Node embedding init: none

```python
params_search_nonpar = {
        'dataset_name': ['dbp15k_jape'],
        'subset_name': ['zh_en'],
        'num_epochs': [2000],
        'lr': [1],
        'eval_batch_size': [1000],  # 1000
        'embedding_dim': [200],
        'n_layers': [2],
        'num_negatives': [50],
        'use_edge_weights': [EdgeWeightsEnum.inverse_in_degree],  # None
        'use_conv_weights': [False],
        'conv_weight_init': [init.xavier_uniform_],
        'train_val_ratio': [0.8],  # size of train subset in comparison with
        'node_embedding_init': ['none'],  # Cite normalisation constant
        'optimizer': [optim.Adam],
        'seed': [seed]
    }
```

In [18]:
non_parametrised.nlargest(1, 'metrics.val_hits_at_1').iloc[0].filter(regex='params\\..+')

params.seed                                               12306
params.lr                                                     1
params.run_hash                0c23b25f180d4a1e0e14dfa92f60af8b
params.train_val_ratio                                      0.8
params.use_conv_weights                                   False
params.dataset_name                                 dbp15k_jape
params.eval_batch_size                                     1000
params.trainable_params                                 7792000
params.vertical_sharing                                    None
params.subset_name                                        zh_en
params.embedding_dim                                        200
params.node_embedding_init                                 none
params.num_epochs                                          2000
params.conv_weight_init                         xavier_uniform_
params.optimizer                                           Adam
params.n_layers                         

In [19]:
non_parametrised.nlargest(5, 'metrics.val_hits_at_1').filter(regex='metrics\\.test.+')

Unnamed: 0,metrics.test_hits_at_10,metrics.test_mrr,metrics.test_hits_at_1,metrics.test_hits_at_50,metrics.test_hits_at_100,metrics.test_mr
1516,0.727286,0.506254,0.392524,0.846952,0.875571,273.394165
1617,0.717381,0.504022,0.393429,0.834762,0.863048,336.529877
2799,0.710667,0.465469,0.345286,0.864333,0.898524,234.702316
2754,0.709762,0.463656,0.342286,0.868952,0.903,211.85817
1277,0.699429,0.497176,0.392667,0.802667,0.830143,487.546844


#### With weigths
```python
params_search_par = {
        'dataset_name': ['dbp15k_jape'],
        'subset_name': ['zh_en'],
        'num_epochs': [2000],
        'lr': [1],
        'eval_batch_size': [1000],  # 1000
        'embedding_dim': [200],
        'n_layers': [3],
        'num_negatives': [50],
        'use_edge_weights': [EdgeWeightsEnum.inverse_in_degree],  # None
        'use_conv_weights': [True],
        'conv_weight_init': [init.xavier_uniform_],
        'train_val_ratio': [0.8],  # size of train subset in comparison with
        'node_embedding_init': ['none'],  # Cite normalisation constant
        'optimizer': [optim.Adam],
        'seed': [seed]
    }
```

In [20]:
parametrised.nlargest(1, 'metrics.val_hits_at_1').iloc[0].filter(regex='params\\..+')

params.seed                                               12306
params.lr                                                     1
params.run_hash                9e3f4677e3186b8483eb22f2cbbcac51
params.train_val_ratio                                      0.8
params.use_conv_weights                                    True
params.dataset_name                                 dbp15k_jape
params.eval_batch_size                                     1000
params.trainable_params                                 7792000
params.vertical_sharing                                   False
params.subset_name                                        zh_en
params.embedding_dim                                        200
params.node_embedding_init                                 none
params.num_epochs                                          2000
params.conv_weight_init                         xavier_uniform_
params.optimizer                                           Adam
params.n_layers                         

In [21]:
parametrised.nlargest(5, 'metrics.val_hits_at_1').filter(regex='metrics\\.test.+')

Unnamed: 0,metrics.test_hits_at_10,metrics.test_mrr,metrics.test_hits_at_1,metrics.test_hits_at_50,metrics.test_hits_at_100,metrics.test_mr
928,0.680667,0.432034,0.310762,0.839476,0.877667,228.109589
922,0.664952,0.413043,0.290667,0.839381,0.883381,207.325851
924,0.661619,0.430171,0.315429,0.800857,0.83719,344.432251
930,0.659429,0.447213,0.338476,0.781571,0.819714,368.029846
918,0.647524,0.415737,0.299857,0.795476,0.835476,362.665588


#### Node embedding init: total

In [22]:
init_total.nlargest(1, 'metrics.val_hits_at_1').iloc[0].filter(regex='params\\..+')

params.seed                                               12306
params.lr                                                    10
params.run_hash                1bfa29ccf008ed9957873e9578e8e289
params.train_val_ratio                                      0.8
params.use_conv_weights                                   False
params.dataset_name                                 dbp15k_jape
params.eval_batch_size                                     1000
params.trainable_params                                 7792000
params.vertical_sharing                                    None
params.subset_name                                        zh_en
params.embedding_dim                                        200
params.node_embedding_init                                total
params.num_epochs                                          3000
params.conv_weight_init                         xavier_uniform_
params.optimizer                                            SGD
params.n_layers                         

In [23]:
init_total.nlargest(5, 'metrics.val_hits_at_1').filter(regex='metrics\\.test.+')

Unnamed: 0,metrics.test_hits_at_10,metrics.test_mrr,metrics.test_hits_at_1,metrics.test_hits_at_50,metrics.test_hits_at_100,metrics.test_mr
2799,0.710667,0.465469,0.345286,0.864333,0.898524,234.702316
2754,0.709762,0.463656,0.342286,0.868952,0.903,211.85817
2777,0.713571,0.468649,0.348429,0.864286,0.896333,235.075958
2675,0.711429,0.486224,0.372762,0.834571,0.863952,371.283661
2670,0.712095,0.484587,0.371524,0.834286,0.867381,340.532806


### Model, proposed used in the original GCN-Align code:
```
params.dataset_name = 'dbp15k_jape' and params.subset_name = 'zh_en' and params.num_epochs = '2000' and params.lr = '20' and params.eval_batch_size = '1000' and params.embedding_dim = '200' and params.n_layers = '2' and params.num_negatives = '5' and params.use_edge_weights = 'EdgeWeightsEnum.inverse_in_degree' and params.use_conv_weights = 'False' and params.conv_weight_init = 'xavier_uniform_' and params.train_val_ratio = '0.8' and params.node_embedding_init = 'total' and params.optimizer = 'SGD' and params.seed = '12306'

```

In [10]:
non_parametrised[non_parametrised['run_id'] == '32901ef5fe1c4f0f9de036cf4a951434'].filter(regex='metrics\\.test.+')

Unnamed: 0,metrics.test_mrr,metrics.test_hits_at_50,metrics.test_hits_at_100,metrics.test_hits_at_10,metrics.test_mr,metrics.test_hits_at_1
2670,0.484587,0.834286,0.867381,0.712095,340.532806,0.371524


### Extra - percentage of runs, where not using weights is better

In [11]:
merged = pd.merge(parametrised, non_parametrised, 
         on=['params.seed', 'params.optimizer', 'params.dataset_name', 
             'params.train_val_ratio', 'params.conv_weight_init', 'params.embedding_dim', 
             'params.subset_name', 'params.lr', 'params.node_embedding_init',
             'params.num_negatives', 'params.use_edge_weights', 'params.num_epochs', 'params.n_layers'], 
        suffixes=('_par', '_nonpar'))

In [12]:
(merged['metrics.val_hits_at_1_nonpar'] >= merged['metrics.val_hits_at_1_par']).value_counts(normalize = True)

True     0.74931
False    0.25069
dtype: float64

In [13]:
((merged['metrics.val_hits_at_10_nonpar'] >= merged['metrics.val_hits_at_10_par'])).value_counts(normalize = True)

True     0.714811
False    0.285189
dtype: float64

In [14]:
((merged['metrics.val_hits_at_50_nonpar'] >= merged['metrics.val_hits_at_50_par'])).value_counts(normalize = True)

True     0.702852
False    0.297148
dtype: float64

## 2. Retraining best param and non-param models on all the datasets
`train_val_ratio` is set to `1.0`

In [15]:
results_2 = mlflow.search_runs(experiment_ids=[1],
                               filter_string = "params.train_val_ratio = '1.0'")
len(results_2)

24

In [16]:
results_retrain = results_2.groupby(by=['params.dataset_name', 'params.subset_name', 'params.use_conv_weights']).\
 agg({'metrics.test_hits_at_1' : ['first'], 
      'metrics.test_hits_at_10' : ['first'],
      'metrics.test_hits_at_50' : ['first']})
results_retrain = formar_df(results_retrain)

In [17]:
results_retrain

Unnamed: 0_level_0,Unnamed: 1_level_0,Hits@1,Hits@1,Hits@10,Hits@10,Hits@50,Hits@50
Unnamed: 0_level_1,Weights,No,Yes,No,Yes,No,Yes
DBP15K (FULL),fr-en,31.23,25.05,67.17,61.17,81.51,79.5
DBP15K (FULL),ja-en,33.4,28.33,66.41,61.49,80.8,78.86
DBP15K (FULL),zh-en,31.26,25.01,64.73,58.79,78.89,77.25
DBP15K (JAPE),fr-en,44.71,33.83,81.78,73.67,91.68,90.03
DBP15K (JAPE),ja-en,45.29,35.85,78.28,73.16,89.0,88.24
DBP15K (JAPE),zh-en,42.89,33.86,76.37,71.57,86.7,86.51
DWY100K,wd,58.22,42.31,85.73,74.75,93.48,88.74
DWY100K,yg,73.0,60.56,92.1,84.73,97.33,94.26
WK3L120K,en-de,9.98,4.78,26.92,16.88,43.8,32.45
WK3L120K,en-fr,8.03,3.2,23.41,11.96,38.99,23.82


In [18]:
to_latex(results_retrain)

\begin{tabular}{*{2}{l}*{6}{@{\extracolsep{\fill}}r}@{\extracolsep{\fill}}}
\toprule
        & {} & \multicolumn{2}{l}{Hits@1} & \multicolumn{2}{l}{Hits@10} & \multicolumn{2}{l}{Hits@50} \\
        & \textbf{Weights} &     No &    Yes &      No &    Yes &      No &    Yes \\
\midrule
\textbf{DBP15K (FULL)} & \textbf{fr-en} &  31.23 &  25.05 &   67.17 &  61.17 &   81.51 &  79.50 \\
        & \textbf{ja-en} &  33.40 &  28.33 &   66.41 &  61.49 &   80.80 &  78.86 \\
        & \textbf{zh-en} &  31.26 &  25.01 &   64.73 &  58.79 &   78.89 &  77.25 \\
\textbf{DBP15K (JAPE)} & \textbf{fr-en} &  44.71 &  33.83 &   81.78 &  73.67 &   91.68 &  90.03 \\
        & \textbf{ja-en} &  45.29 &  35.85 &   78.28 &  73.16 &   89.00 &  88.24 \\
        & \textbf{zh-en} &  42.89 &  33.86 &   76.37 &  71.57 &   86.70 &  86.51 \\
\textbf{DWY100K} & \textbf{wd} &  58.22 &  42.31 &   85.73 &  74.75 &   93.48 &  88.74 \\
        & \textbf{yg} &  73.00 &  60.56 &   92.10 &  84.73 &   97.33 &  94.26 \\
\textbf{WK

In [19]:
results_retrain_mrr = results_2.groupby(by=['params.dataset_name', 'params.subset_name', 'params.use_conv_weights']).\
 agg({'metrics.test_mr' : ['first'], 
      'metrics.test_mrr' : ['first']})
results_retrain_mrr = formar_df(results_retrain_mrr, new_metrics_names=['MR', 'MRR'], scale=1, round_digits=3)
results_retrain_mrr

Unnamed: 0_level_0,Unnamed: 1_level_0,MR,MR,MRR,MRR
Unnamed: 0_level_1,Weights,No,Yes,No,Yes
DBP15K (FULL),fr-en,267.523,257.094,0.431,0.369
DBP15K (FULL),ja-en,276.643,266.306,0.444,0.392
DBP15K (FULL),zh-en,342.393,288.309,0.424,0.361
DBP15K (JAPE),fr-en,139.709,111.004,0.573,0.469
DBP15K (JAPE),ja-en,197.384,137.57,0.566,0.482
DBP15K (JAPE),zh-en,238.115,180.02,0.543,0.463
DWY100K,wd,514.609,312.208,0.68,0.535
DWY100K,yg,95.302,59.714,0.798,0.691
WK3L120K,en-de,2844.808,2142.35,0.159,0.091
WK3L120K,en-fr,4331.123,3986.397,0.133,0.063


In [20]:
to_latex(results_retrain_mrr)

\begin{tabular}{*{2}{l}*{6}{@{\extracolsep{\fill}}r}@{\extracolsep{\fill}}}
\toprule
        & {} & \multicolumn{2}{l}{MR} & \multicolumn{2}{l}{MRR} \\
        & \textbf{Weights} &        No &       Yes &     No &    Yes \\
\midrule
\textbf{DBP15K (FULL)} & \textbf{fr-en} &   267.523 &   257.094 &  0.431 &  0.369 \\
        & \textbf{ja-en} &   276.643 &   266.306 &  0.444 &  0.392 \\
        & \textbf{zh-en} &   342.393 &   288.309 &  0.424 &  0.361 \\
\textbf{DBP15K (JAPE)} & \textbf{fr-en} &   139.709 &   111.004 &  0.573 &  0.469 \\
        & \textbf{ja-en} &   197.384 &   137.570 &  0.566 &  0.482 \\
        & \textbf{zh-en} &   238.115 &   180.020 &  0.543 &  0.463 \\
\textbf{DWY100K} & \textbf{wd} &   514.609 &   312.208 &  0.680 &  0.535 \\
        & \textbf{yg} &    95.302 &    59.714 &  0.798 &  0.691 \\
\textbf{WK3L120K} & \textbf{en-de} &  2844.808 &  2142.350 &  0.159 &  0.091 \\
        & \textbf{en-fr} &  4331.123 &  3986.397 &  0.133 &  0.063 \\
\textbf{WK3L15K} & \text

## 3. Ablation study
### For use_conv_weights == True or (use_conv_weights == False and node_embedding_init == 'none')
```python
params_search = {
    'dataset_name': [args.dataset_name],
    'subset_name': subsets[args.dataset_name],
    'num_epochs': [2000],
    'lr': [1, 10, 20, 30],
    'eval_batch_size': [1000], 
    'embedding_dim': [200],
    'n_layers': [2, 3, 4],
    'num_negatives': [50],
    'use_edge_weights': [EdgeWeightsEnum.inverse_in_degree],  # None
    'use_conv_weights': [False, True],
    'conv_weight_init': [init.xavier_uniform_],
    'train_val_ratio': [0.8],  # size of train subset in comparison with
    'node_embedding_init': ['total', 'none'], 
    'optimizer': [optim.Adam],
    'seed': [seed]
}
```
### For use_conv_weights == False and node_embedding_init == 'total'
```python
params_search = {
    'dataset_name': [args.dataset_name],
    'subset_name': subsets[args.dataset_name],
    'num_epochs': [2000, 3000],
    'lr': [0.5, 1],
    'eval_batch_size': [1000],  # 1000
    'embedding_dim': [200],
    'n_layers': [2],
    'num_negatives': [100],
    'use_edge_weights': [EdgeWeightsEnum.inverse_in_degree],  # None
    'use_conv_weights': [False],
    'conv_weight_init': [init.xavier_uniform_],
    'train_val_ratio': [0.8],  # size of train subset in comparison with
    'node_embedding_init': ['total'], 
    'optimizer': [optim.SGD],
    'seed': [seed]
}
```

In [140]:
results = mlflow.search_runs(experiment_ids=[1])
results = drop_duplicates_and_zero_NA(results)

# Ablation study hyperparameter tuning
results_3 = results[results['params.lr'].isin(['1', '10', '20', '30']) & 
                  results['params.n_layers'].isin(['2', '3', '4']) & 
                  (results['params.num_negatives'] == '50') &
                  (results['params.num_epochs'] == '2000') & 
                  (results['params.train_val_ratio'] == '0.8') &
                  (results['params.optimizer'] == 'Adam')]

results_4 = results[results['params.lr'].isin(['0.5', '1']) & 
                  results['params.n_layers'].isin(['2']) & 
                  (results['params.num_negatives'] == '100') &
                  (results['params.num_epochs'].isin(['2000', '3000'])) & 
                  (results['params.train_val_ratio'] == '0.8') &
                  (results['params.optimizer'] == 'SGD') &
                  results['params.node_embedding_init'].isin(['total', 'none'])]
results_34 = pd.concat([results_3, results_4])
len(results_34)

1070

In [165]:
split_by = ['params.dataset_name', 'params.subset_name', 'params.node_embedding_init', 'params.use_conv_weights']
results_abl = results_34.sort_values('metrics.val_hits_at_1', ascending=False).drop_duplicates(split_by)
results_abl = results_abl.groupby(by=split_by).\
    agg({'metrics.test_hits_at_1' : ['mean'], 
         'metrics.test_hits_at_10' : ['mean'],
         'metrics.test_hits_at_50' : ['mean'] })
results_abl_table = format_df(100*results_abl, unstack_levels = [4, 3, 2], 
                              hyperparam_names = [None, 'Weights', 'Emb. init'])
results_abl_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Hits@1,Hits@1,Hits@1,Hits@1,Hits@10,Hits@10,Hits@10,Hits@10,Hits@50,Hits@50,Hits@50,Hits@50
Unnamed: 0_level_1,Weights,No,No,Yes,Yes,No,No,Yes,Yes,No,No,Yes,Yes
Unnamed: 0_level_2,Emb. init,none,total,none,total,none,total,none,total,none,total,none,total
DBP15K (FULL),fr-en,27.6,23.83,21.09,15.23,63.08,57.6,57.89,45.67,79.08,75.63,81.25,68.15
DBP15K (FULL),ja-en,30.39,25.29,25.75,19.76,64.65,56.3,59.61,48.06,81.08,73.04,80.93,68.43
DBP15K (FULL),zh-en,27.54,19.0,21.68,17.31,62.95,51.22,54.76,47.58,81.53,74.24,73.27,70.18
DBP15K (JAPE),fr-en,41.44,36.96,32.16,26.95,78.81,74.87,71.23,65.71,90.1,88.85,86.73,84.33
DBP15K (JAPE),ja-en,40.61,35.12,32.88,27.22,74.37,69.74,68.99,62.08,86.5,83.73,84.06,79.47
DBP15K (JAPE),zh-en,39.25,34.44,31.08,26.93,72.73,68.03,68.07,62.45,84.7,81.59,83.95,79.53
DWY100K,wd,54.39,48.54,45.6,33.67,82.62,76.17,74.95,64.22,91.64,87.19,85.93,79.27
DWY100K,yg,70.49,62.84,64.43,50.28,90.67,84.4,86.35,75.23,96.65,93.09,93.95,87.7
WK3L120K,en-de,9.12,8.29,8.62,6.09,25.36,23.43,24.1,19.48,42.13,39.55,40.6,35.16
WK3L120K,en-fr,7.55,6.63,6.89,4.73,22.34,20.01,21.12,15.46,37.67,34.87,36.37,28.15


In [101]:
# Ablation study - Retraining on full train
results = mlflow.search_runs(experiment_ids=[1])
results5 = results[results['params.seed'].isin(['42', '424', '4242', '42424', '424242'])]
len(results5)

240

In [114]:
split_by = ['params.dataset_name', 'params.subset_name', 'params.node_embedding_init', 'params.use_conv_weights']
results_abl = results5.groupby(by=split_by).\
    agg({'metrics.test_hits_at_1' : ['mean', 'std'], 
         'metrics.test_hits_at_10' : ['mean', 'std'],
         'metrics.test_hits_at_50' : ['mean', 'std']})
results_abl *= 100
results_abl = results_abl.stack(level=0)
results_abl['mean+std'] = results_abl['mean'].apply("{0:.2f}".format) + ' ± ' + results_abl['std'].apply("{0:.2f}".format)
results_abl = results_abl.unstack().swaplevel(0,1,axis=1).sort_index(axis=1)
results_abl_table = format_df(results_abl.loc[:, (slice(None), 'mean+std')], unstack_levels = [4, 3, 2], 
                              hyperparam_names = [None, 'Weights', 'Emb. init'])
results_abl_table = 

Unnamed: 0_level_0,Unnamed: 1_level_0,Hits@1,Hits@1,Hits@1,Hits@1,Hits@10,Hits@10,Hits@10,Hits@10,Hits@50,Hits@50,Hits@50,Hits@50
Unnamed: 0_level_1,Weights,No,No,Yes,Yes,No,No,Yes,Yes,No,No,Yes,Yes
Unnamed: 0_level_2,Emb. init,none,total,none,total,none,total,none,total,none,total,none,total
DBP15K (FULL),fr-en,31.51 ± 0.16,27.64 ± 0.22,21.82 ± 0.39,16.73 ± 0.59,68.38 ± 0.32,63.41 ± 0.14,59.26 ± 0.55,48.55 ± 0.92,83.33 ± 0.13,80.77 ± 0.30,82.13 ± 0.66,71.28 ± 1.15
DBP15K (FULL),ja-en,33.26 ± 0.10,29.06 ± 0.23,26.21 ± 0.33,20.78 ± 0.16,68.22 ± 0.09,61.95 ± 0.17,61.12 ± 0.51,49.56 ± 0.38,84.32 ± 0.05,78.53 ± 0.09,82.55 ± 0.07,70.06 ± 0.74
DBP15K (FULL),zh-en,31.15 ± 0.15,22.55 ± 0.27,24.96 ± 0.71,18.85 ± 0.99,67.46 ± 0.11,56.03 ± 0.21,59.07 ± 1.10,50.32 ± 1.52,85.41 ± 0.12,77.18 ± 0.19,77.69 ± 1.11,72.25 ± 1.34
DBP15K (JAPE),fr-en,45.37 ± 0.13,41.03 ± 0.13,35.36 ± 0.33,30.50 ± 0.38,82.48 ± 0.08,79.11 ± 0.07,74.71 ± 0.27,69.72 ± 0.36,92.84 ± 0.08,91.36 ± 0.10,89.17 ± 0.13,86.97 ± 0.19
DBP15K (JAPE),ja-en,45.53 ± 0.18,40.29 ± 0.09,35.81 ± 0.53,31.46 ± 0.15,79.77 ± 0.14,75.13 ± 0.20,73.05 ± 0.52,67.18 ± 0.28,90.41 ± 0.06,87.81 ± 0.07,88.03 ± 0.12,83.71 ± 0.39
DBP15K (JAPE),zh-en,43.30 ± 0.12,39.37 ± 0.20,33.61 ± 0.49,29.94 ± 0.35,77.63 ± 0.05,73.66 ± 0.28,71.16 ± 0.17,66.22 ± 0.51,88.46 ± 0.13,86.11 ± 0.11,86.30 ± 0.24,82.30 ± 0.38
DWY100K,wd,58.50 ± 0.05,54.07 ± 0.05,50.13 ± 0.11,38.85 ± 0.31,86.26 ± 0.05,81.30 ± 0.03,79.65 ± 0.20,69.73 ± 0.25,94.40 ± 0.04,90.81 ± 0.04,89.81 ± 0.13,83.38 ± 0.20
DWY100K,yg,72.82 ± 0.06,67.06 ± 0.03,67.36 ± 0.10,60.67 ± 0.30,92.13 ± 0.04,87.57 ± 0.04,88.64 ± 0.09,83.76 ± 0.27,97.54 ± 0.02,94.90 ± 0.03,95.51 ± 0.11,93.39 ± 0.13
WK3L120K,en-de,10.10 ± 0.03,9.17 ± 0.05,9.02 ± 0.17,6.75 ± 0.12,27.13 ± 0.02,24.92 ± 0.03,25.49 ± 0.26,20.83 ± 0.29,44.15 ± 0.03,41.45 ± 0.11,42.47 ± 0.42,36.92 ± 0.25
WK3L120K,en-fr,8.28 ± 0.03,7.38 ± 0.03,7.26 ± 0.11,5.07 ± 0.16,23.73 ± 0.04,21.57 ± 0.05,22.16 ± 0.24,16.31 ± 0.35,39.40 ± 0.06,36.64 ± 0.05,37.85 ± 0.29,29.25 ± 0.55


In [117]:
to_latex(results_abl_table[['Hits@10']])

\begin{tabular}{*{2}{l}*{6}{@{\extracolsep{\fill}}r}@{\extracolsep{\fill}}}
\toprule
        & {} & \multicolumn{4}{l}{Hits@10} \\
        & \textbf{Weights} & \multicolumn{2}{l}{No} & \multicolumn{2}{l}{Yes} \\
        & \textbf{Emb. init} &          none &         total &          none &         total \\
\midrule
\textbf{DBP15K (FULL)} & \textbf{fr-en} &  68.38 ± 0.32 &  63.41 ± 0.14 &  59.26 ± 0.55 &  48.55 ± 0.92 \\
        & \textbf{ja-en} &  68.22 ± 0.09 &  61.95 ± 0.17 &  61.12 ± 0.51 &  49.56 ± 0.38 \\
        & \textbf{zh-en} &  67.46 ± 0.11 &  56.03 ± 0.21 &  59.07 ± 1.10 &  50.32 ± 1.52 \\
\textbf{DBP15K (JAPE)} & \textbf{fr-en} &  82.48 ± 0.08 &  79.11 ± 0.07 &  74.71 ± 0.27 &  69.72 ± 0.36 \\
        & \textbf{ja-en} &  79.77 ± 0.14 &  75.13 ± 0.20 &  73.05 ± 0.52 &  67.18 ± 0.28 \\
        & \textbf{zh-en} &  77.63 ± 0.05 &  73.66 ± 0.28 &  71.16 ± 0.17 &  66.22 ± 0.51 \\
\textbf{DWY100K} & \textbf{wd} &  86.26 ± 0.05 &  81.30 ± 0.03 &  79.65 ± 0.20 &  69.73 ± 0.25 \\
   

In [119]:
split_by = ['params.dataset_name', 'params.subset_name', 'params.node_embedding_init', 'params.use_conv_weights']
results_abl = results5.groupby(by=split_by).\
    agg({'metrics.test_mr' : ['mean', 'std'], 
         'metrics.test_mrr' : ['mean', 'std']})
results_abl = results_abl.stack(level=0)
results_abl['mean+std'] = results_abl['mean'].apply("{0:.2f}".format) + ' ± ' + results_abl['std'].apply("{0:.3f}".format)
results_abl = results_abl.unstack().swaplevel(0,1,axis=1).sort_index(axis=1)
results_abl_table_mr = format_df(results_abl.loc[:, (slice(None), 'mean+std')], unstack_levels = [4, 3, 2], 
                                 hyperparam_names = [None, 'Weights', 'Emb. init'],
                                 new_metrics_names=['MR', 'MRR'], scale=1)
results_abl_table_mr

Unnamed: 0_level_0,Unnamed: 1_level_0,MR,MR,MR,MR,MRR,MRR,MRR,MRR
Unnamed: 0_level_1,Weights,No,No,Yes,Yes,No,No,Yes,Yes
Unnamed: 0_level_2,Emb. init,none,total,none,total,none,total,none,total
DBP15K (FULL),fr-en,203.90 ± 3.799,262.24 ± 3.229,123.09 ± 15.434,208.00 ± 12.039,0.44 ± 0.001,0.39 ± 0.002,0.34 ± 0.005,0.27 ± 0.007
DBP15K (FULL),ja-en,206.17 ± 4.214,358.53 ± 3.647,138.80 ± 12.870,238.24 ± 24.092,0.45 ± 0.001,0.40 ± 0.002,0.38 ± 0.004,0.30 ± 0.002
DBP15K (FULL),zh-en,168.80 ± 2.588,149.08 ± 2.700,279.49 ± 38.778,206.36 ± 17.598,0.43 ± 0.001,0.34 ± 0.002,0.36 ± 0.008,0.29 ± 0.012
DBP15K (JAPE),fr-en,109.64 ± 1.565,117.59 ± 2.911,130.75 ± 8.476,133.14 ± 7.085,0.58 ± 0.001,0.54 ± 0.000,0.48 ± 0.003,0.43 ± 0.003
DBP15K (JAPE),ja-en,144.81 ± 1.892,195.19 ± 3.443,146.42 ± 6.446,221.92 ± 12.220,0.57 ± 0.001,0.52 ± 0.001,0.48 ± 0.006,0.43 ± 0.001
DBP15K (JAPE),zh-en,181.37 ± 4.052,215.23 ± 4.532,172.05 ± 12.719,236.72 ± 2.839,0.55 ± 0.001,0.51 ± 0.001,0.46 ± 0.004,0.42 ± 0.004
DWY100K,wd,277.08 ± 8.275,460.32 ± 9.174,500.61 ± 24.099,563.29 ± 28.924,0.68 ± 0.000,0.64 ± 0.000,0.61 ± 0.001,0.50 ± 0.003
DWY100K,yg,49.32 ± 2.709,102.50 ± 3.688,105.52 ± 4.628,67.71 ± 3.816,0.80 ± 0.000,0.74 ± 0.000,0.75 ± 0.001,0.69 ± 0.003
WK3L120K,en-de,2753.75 ± 6.687,2280.31 ± 8.969,2843.96 ± 53.292,2289.02 ± 36.710,0.16 ± 0.000,0.15 ± 0.000,0.15 ± 0.002,0.12 ± 0.002
WK3L120K,en-fr,4438.81 ± 9.289,4110.23 ± 7.903,4551.39 ± 55.291,4007.91 ± 59.013,0.14 ± 0.000,0.12 ± 0.000,0.12 ± 0.002,0.09 ± 0.002


In [122]:
to_latex(results_abl_table_mr[['MRR']])

\begin{tabular}{*{2}{l}*{6}{@{\extracolsep{\fill}}r}@{\extracolsep{\fill}}}
\toprule
        & {} & \multicolumn{4}{l}{MRR} \\
        & \textbf{Weights} & \multicolumn{2}{l}{No} & \multicolumn{2}{l}{Yes} \\
        & \textbf{Emb. init} &          none &         total &          none &         total \\
\midrule
\textbf{DBP15K (FULL)} & \textbf{fr-en} &  0.44 ± 0.001 &  0.39 ± 0.002 &  0.34 ± 0.005 &  0.27 ± 0.007 \\
        & \textbf{ja-en} &  0.45 ± 0.001 &  0.40 ± 0.002 &  0.38 ± 0.004 &  0.30 ± 0.002 \\
        & \textbf{zh-en} &  0.43 ± 0.001 &  0.34 ± 0.002 &  0.36 ± 0.008 &  0.29 ± 0.012 \\
\textbf{DBP15K (JAPE)} & \textbf{fr-en} &  0.58 ± 0.001 &  0.54 ± 0.000 &  0.48 ± 0.003 &  0.43 ± 0.003 \\
        & \textbf{ja-en} &  0.57 ± 0.001 &  0.52 ± 0.001 &  0.48 ± 0.006 &  0.43 ± 0.001 \\
        & \textbf{zh-en} &  0.55 ± 0.001 &  0.51 ± 0.001 &  0.46 ± 0.004 &  0.42 ± 0.004 \\
\textbf{DWY100K} & \textbf{wd} &  0.68 ± 0.000 &  0.64 ± 0.000 &  0.61 ± 0.001 &  0.50 ± 0.003 \\
       