In [None]:
import optuna
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from glob import glob
import re

In [None]:
def load_study(study_name):
    journal_name = f'{study_name}.log'
    study_storage = optuna.storages.JournalStorage(
        optuna.storages.JournalFileStorage(journal_name)) 
    for study in study_storage.get_all_studies():
        study_name = study_storage.get_study_name_from_id(study._study_id)
        break
    study = optuna.study.load_study(study_name=study_name, storage=study_storage)
    return study

In [None]:
def parse_studies(prefix='criteo_', suffix='.log', pattern='*.log'):
    def helper():
        for filename in glob('*.log'):
            study_name = filename.removesuffix(suffix)
            clean_name = study_name.removeprefix(prefix)
            embed_dim_match = re.search('_([0-9]+)$', clean_name)
            embed_dim = int(embed_dim_match.group(1))
            exp_name = clean_name.removesuffix(embed_dim_match.group(0))
            yield exp_name, embed_dim, study_name
    return list(helper())

In [None]:
parse_studies()

In [None]:
trials_df = []
for experiment, embed_dim, study_name in parse_studies():
    study = load_study(study_name)
    print(f'Study {study_name} has {len(study.trials)} trials')

    trial_data = [trial.params | 
                  trial.user_attrs | 
                  dict(val_loss=trial.value, study_name=study_name, experiment=experiment, embed_dim=embed_dim)
                  for trial in [study.best_trial]]
    trial_data_df = pd.DataFrame.from_records(trial_data)
    trials_df.append(trial_data_df)

trials_df = pd.concat(trials_df, axis=0)
trials_df

In [None]:
trials_df[['experiment', 'embed_dim', 'lr', 'l2reg']].pivot(columns=['experiment'], index='embed_dim', values=['lr', 'l2reg'])

In [None]:
pivoted = trials_df[['experiment', 'embed_dim', 'val_loss']].pivot(columns=['experiment'], index='embed_dim', values=['val_loss'])
pivoted

In [None]:
pivoted = trials_df[['experiment', 'embed_dim', 'test_loss']].pivot(columns=['experiment'], index='embed_dim', values=['test_loss'])
pivoted

In [None]:
lift_pct = (100 * (1 - pivoted.iloc[:, 1:] / pivoted.iloc[:, 0].values[:, np.newaxis]))
lift_pct