In [1]:
import optuna
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from glob import glob
import re

In [2]:
def load_study(study_name):
    journal_name = f'{study_name}.log'
    study_storage = optuna.storages.JournalStorage(
        optuna.storages.JournalFileStorage(journal_name)) 
    for study in study_storage.get_all_studies():
        study_name = study_storage.get_study_name_from_id(study._study_id)
        break
    study = optuna.study.load_study(study_name=study_name, storage=study_storage)
    return study

In [3]:
def parse_studies(prefix='criteo_', suffix='.log', pattern='*.log'):
    def helper():
        for filename in glob('*.log'):
            study_name = filename.removesuffix(suffix)
            clean_name = study_name.removeprefix(prefix)
            embed_dim_match = re.search('_([0-9]+)$', clean_name)
            embed_dim = int(embed_dim_match.group(1))
            exp_name = clean_name.removesuffix(embed_dim_match.group(0))
            yield exp_name, embed_dim, study_name
    return list(helper())

In [4]:
parse_studies()

[('bins', 32, 'criteo_bins_32'),
 ('splines_3', 32, 'criteo_splines_3_32'),
 ('splines_0', 32, 'criteo_splines_0_32'),
 ('bins', 48, 'criteo_bins_48'),
 ('splines_0', 48, 'criteo_splines_0_48'),
 ('splines_3', 48, 'criteo_splines_3_48')]

In [5]:
trials_df = []
for experiment, embed_dim, study_name in parse_studies():
    study = load_study(study_name)
    print(f'Study {study_name} has {len(study.trials)} trials')

    trial_data = [trial.params | 
                  trial.user_attrs | 
                  dict(val_loss=trial.value, study_name=study_name, experiment=experiment, embed_dim=embed_dim)
                  for trial in [study.best_trial]]
    trial_data_df = pd.DataFrame.from_records(trial_data)
    trials_df.append(trial_data_df)

trials_df = pd.concat(trials_df, axis=0)
trials_df

Study criteo_bins_32 has 50 trials
Study criteo_splines_3_32 has 50 trials


  study_storage = optuna.storages.JournalStorage(


Study criteo_splines_0_32 has 50 trials
Study criteo_bins_48 has 50 trials
Study criteo_splines_0_48 has 50 trials
Study criteo_splines_3_48 has 50 trials


Unnamed: 0,lr,l2reg,random_seed,degree,best_epoch,test_loss,val_loss,study_name,experiment,embed_dim
0,0.000463,1.501436e-08,42,0,0,0.450826,0.451691,criteo_bins_32,bins,32
0,0.00018,2.534758e-05,42,3,1,0.449303,0.449933,criteo_splines_3_32,splines_3,32
0,0.000424,1.382697e-08,42,0,0,0.450108,0.450735,criteo_splines_0_32,splines_0,32
0,0.000159,3.470267e-05,42,0,1,0.451438,0.451914,criteo_bins_48,bins,48
0,0.000366,0.0003988743,42,0,0,0.451067,0.451508,criteo_splines_0_48,splines_0,48
0,0.000369,9.449331e-07,42,3,0,0.449874,0.450585,criteo_splines_3_48,splines_3,48


In [6]:
trials_df[['experiment', 'embed_dim', 'lr', 'l2reg']].pivot(columns=['experiment'], index='embed_dim', values=['lr', 'l2reg'])

Unnamed: 0_level_0,lr,lr,lr,l2reg,l2reg,l2reg
experiment,bins,splines_0,splines_3,bins,splines_0,splines_3
embed_dim,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
32,0.000463,0.000424,0.00018,1.501436e-08,1.382697e-08,2.534758e-05
48,0.000159,0.000366,0.000369,3.470267e-05,0.0003988743,9.449331e-07


In [7]:
pivoted = trials_df[['experiment', 'embed_dim', 'val_loss']].pivot(columns=['experiment'], index='embed_dim', values=['val_loss'])
pivoted

Unnamed: 0_level_0,val_loss,val_loss,val_loss
experiment,bins,splines_0,splines_3
embed_dim,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
32,0.451691,0.450735,0.449933
48,0.451914,0.451508,0.450585


In [8]:
pivoted = trials_df[['experiment', 'embed_dim', 'test_loss']].pivot(columns=['experiment'], index='embed_dim', values=['test_loss'])
pivoted

Unnamed: 0_level_0,test_loss,test_loss,test_loss
experiment,bins,splines_0,splines_3
embed_dim,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
32,0.450826,0.450108,0.449303
48,0.451438,0.451067,0.449874


In [9]:
lift_pct = (100 * (1 - pivoted.iloc[:, 1:] / pivoted.iloc[:, 0].values[:, np.newaxis]))
lift_pct

Unnamed: 0_level_0,test_loss,test_loss
experiment,splines_0,splines_3
embed_dim,Unnamed: 1_level_2,Unnamed: 2_level_2
32,0.159265,0.337855
48,0.082011,0.346285
