In [15]:
import optuna
import matplotlib.pyplot as plt
import pandas as pd

In [16]:
def load_study(study_name):
    journal_name = f'{study_name}.log'
    study_storage = optuna.storages.JournalStorage(
        optuna.storages.JournalFileStorage(journal_name)) 
    study = optuna.create_study(study_name=study_name,
                                storage=study_storage,
                                direction='minimize',
                                load_if_exists=True)
    return study

In [17]:
degrees = [0, 2]
dims = [8, 10, 12, 14, 16, 18]

trials_df = []
for deg in degrees:
    for dim in dims:
        study_name = f'criteo_{deg}_{dim}'
        study = load_study(study_name)
        print(f'Study {study_name} has {len(study.trials)} trials')
        
        trial_data = [trial.params | trial.user_attrs | dict(val_loss=trial.value) 
                      for trial in [study.best_trial]]
        trial_data_df = pd.DataFrame.from_records(trial_data)
        trials_df.append(trial_data_df)

trials_df = pd.concat(trials_df, axis=0)
trials_df

  study_storage = optuna.storages.JournalStorage(
[I 2024-04-01 07:42:11,366] Using an existing study with name 'criteo_0_8' instead of creating a new one.


Study criteo_0_8 has 50 trials


[I 2024-04-01 07:42:11,404] Using an existing study with name 'criteo_0_10' instead of creating a new one.
[I 2024-04-01 07:42:11,439] Using an existing study with name 'criteo_0_12' instead of creating a new one.
[I 2024-04-01 07:42:11,464] Using an existing study with name 'criteo_0_14' instead of creating a new one.
[I 2024-04-01 07:42:11,487] Using an existing study with name 'criteo_0_16' instead of creating a new one.
[I 2024-04-01 07:42:11,511] Using an existing study with name 'criteo_0_18' instead of creating a new one.
[I 2024-04-01 07:42:11,535] Using an existing study with name 'criteo_2_8' instead of creating a new one.


Study criteo_0_10 has 50 trials
Study criteo_0_12 has 50 trials
Study criteo_0_14 has 50 trials
Study criteo_0_16 has 50 trials
Study criteo_0_18 has 50 trials


[I 2024-04-01 07:42:11,577] Using an existing study with name 'criteo_2_10' instead of creating a new one.


Study criteo_2_8 has 50 trials


[I 2024-04-01 07:42:11,608] Using an existing study with name 'criteo_2_12' instead of creating a new one.


Study criteo_2_10 has 50 trials
Study criteo_2_12 has 50 trials


[I 2024-04-01 07:42:11,634] Using an existing study with name 'criteo_2_14' instead of creating a new one.
[I 2024-04-01 07:42:11,660] Using an existing study with name 'criteo_2_16' instead of creating a new one.
[I 2024-04-01 07:42:11,686] Using an existing study with name 'criteo_2_18' instead of creating a new one.


Study criteo_2_14 has 50 trials
Study criteo_2_16 has 50 trials
Study criteo_2_18 has 50 trials


Unnamed: 0,lr,l2reg,emb_dim,random_seed,degree,best_epoch,test_loss,val_loss
0,0.000184,6e-06,8,42,0,19,0.443116,0.443119
0,0.000257,8e-06,10,42,0,19,0.443004,0.44303
0,0.000184,6e-06,12,42,0,15,0.442669,0.44259
0,0.000184,6e-06,14,42,0,14,0.442581,0.44253
0,0.000184,6e-06,16,42,0,14,0.442496,0.442502
0,0.000201,9e-06,18,42,0,19,0.442454,0.442413
0,0.000184,6e-06,8,42,2,19,0.443034,0.442965
0,0.000184,6e-06,10,42,2,15,0.442716,0.442686
0,0.000184,6e-06,12,42,2,15,0.442533,0.442447
0,0.000184,6e-06,14,42,2,15,0.442345,0.44232


In [18]:
pivoted = trials_df[['emb_dim', 'degree', 'test_loss']].pivot(index=['emb_dim'], columns=['degree'], values=['test_loss'])

In [19]:
lift_pct = (100 * (1 - pivoted['test_loss'][2] / pivoted['test_loss'][0])).to_frame()
lift_pct.columns = pd.MultiIndex.from_tuples([('test_loss', 'Lift (%)')])

In [22]:
combined = pd.concat([pivoted, lift_pct], axis=1)
combined.transpose()

Unnamed: 0,emb_dim,8,10,12,14,16,18
test_loss,0,0.443116,0.443004,0.442669,0.442581,0.442496,0.442454
test_loss,2,0.443034,0.442716,0.442533,0.442345,0.442204,0.442279
test_loss,Lift (%),0.018648,0.065007,0.030623,0.053229,0.066111,0.039528


In [23]:
print(combined.transpose().style.format(precision=4).to_latex(hrules=True))

\begin{tabular}{llrrrrrr}
\toprule
 & emb_dim & 8 & 10 & 12 & 14 & 16 & 18 \\
\midrule
\multirow[c]{3}{*}{test_loss} & 0 & 0.4431 & 0.4430 & 0.4427 & 0.4426 & 0.4425 & 0.4425 \\
 & 2 & 0.4430 & 0.4427 & 0.4425 & 0.4423 & 0.4422 & 0.4423 \\
 & Lift (%) & 0.0186 & 0.0650 & 0.0306 & 0.0532 & 0.0661 & 0.0395 \\
\bottomrule
\end{tabular}

