In [1]:
import numpy as np
import pandas as pd 
import warnings
import pickle
import wandb
import time
import tqdm

In [19]:
api = wandb.Api(timeout=60)
entity, project = 'the-real-dl', 'bio-transformers'
runs = api.runs(entity + '/' + project)
dataset = 'fashion'

In [3]:
your_sweep_ids = [
    '0jatqkyi', # fashion-bp-adagrad
    '1gtby7pv', # fashion-bp-adam
    '8m7yfmsq', # fashion-bp-rmsprop
    '8jmex28k', # fashion-bp-momentum
    'ujbl0nw9', # fashion-pc-adagrad-adam
    'j6u1zbtz', # fashion-pc-adam-adam
    '2td03ada', # fashion-pc-rmsprop-adam
    'lct3mndb', # fashion-pc-momentum-adam
    'nki5ynwb', # fashion-pc-adagrad-momentum
    'slmsr4uw', # fashion-pc-adam-momentum
    '9jllkbpf', # fashion-pc-rmsprop-momentum
    'l6yr9ncr', # fashion-pc-momentum-momentum
]

In [4]:
def extract(wandb_runs, maxrun=None):
    
    dfs = []
    count = 0

    for i, run in enumerate(runs):
        # time.sleep(2) # to avoid 429 Client Error
        print(f'Fetching run #{i}', end='\r')
        if maxrun is not None and count == maxrun: break

        summary = run.summary
        if not 'epoch' in summary.keys(): continue
        if run.state == 'running': continue
        if run.state == 'crashed': continue
        if run.state == 'failed': continue

        run_id = run.id
        sweep_id = run.sweep.id

        if sweep_id not in your_sweep_ids: continue

        config = run.config
        run_df = run.history()

        config.update({'sweep-id': sweep_id, 'run-id': run_id})
        summary.update({'sweep-id': sweep_id, 'run-id': run_id})

        l_epochs = []
        l_train_loss = []
        l_test_loss = []
        l_train_energy = []

        for e in range(summary['epoch']):
            x = run_df[run_df.epoch == e]
            if len(x.index) == 0: continue

            train_losses = x.train_loss.unique()
            test_losses = x.test_loss.unique()

            train_loss_no_nan = train_losses[~np.isnan(train_losses)]
            test_loss_no_nan = test_losses[~np.isnan(test_losses)]

            train_loss = train_loss_no_nan[0] if len(train_loss_no_nan) > 0 else np.nan
            test_loss = test_loss_no_nan[0] if len(test_loss_no_nan) > 0 else np.nan

            l_epochs.append(e)
            l_train_loss.append(train_loss)
            l_test_loss.append(test_loss)

            if config['training'] == 'pc':
                train_energies = x.train_energy.unique()
                train_energy_no_nan = train_energies[~np.isnan(train_energies)]
                train_energy = train_energy_no_nan[0] if len(train_energy_no_nan) > 0 else np.nan
                l_train_energy.append(train_energy)

            if config['training'] == 'bp':
                run_ids = np.full(len(l_epochs), run_id)
                sweep_ids = np.full(len(l_epochs), sweep_id)
                data = list(zip(sweep_ids, run_ids, l_epochs, l_train_loss, l_test_loss))
                columns = ['sweep_id', 'run_id', 'epoch', 'train_loss', 'test_loss']
            else:
                run_ids = np.full(len(l_epochs), run_id)
                sweep_ids = np.full(len(l_epochs), sweep_id)
                data = list(zip(sweep_ids, run_ids, l_epochs, l_train_loss, l_test_loss, l_train_energy))
                columns = ['sweep_id', 'run_id', 'epoch', 'train_loss', 'test_loss', 'train_energy']

        df = pd.DataFrame(data, columns=columns)

        # adding configs to df
        for k in list(config.keys())[:-2]:
            df['c_' + str(k)] = config[k]

        # adding summary to df
        for k in summary.keys():
            if k in ['_runtime', 'train_loss', 'test_loss', 'train_energy']:
                df['f_' + str(k)] = summary[k]

        dfs.append(df)
        count += 1


    print(f'Fetched {count} runs in {runs.entity}/{runs.project} (max runs: {"all" if maxrun is None else maxrun})')
    return pd.concat(dfs, axis=0).reset_index(drop=True)

In [5]:
def sort_runs_by_test_loss(df, top=None):
    x = df.drop(columns=['epoch', 'train_loss', 'test_loss', 'train_energy'])
    x = x.drop(columns=['c_loss', 'c_seed', 'c_model', 'c_wandb', 'c_device'])
    x = x.drop(columns=['c_epochs', 'c_dataset', 'c_dropout', 'c_batch-size'])

    if x.c_training.unique()[0] == 'bp':
        x = x.drop(columns=[c for c in list(x.columns) if c.startswith('c_pc')])
        x = x.drop(columns=['c_clr', 'c_init', 'c_iterations', 'c_x_optimizer', 'f_train_energy'])

    x = x.drop_duplicates().reset_index(drop=True)
    x = x.sort_values('f_test_loss', ascending=True).reset_index(drop=True)

    return x if top is None else x[:top]

In [6]:
def top_parameters(df):
    df_params = pd.DataFrame(columns=['parameter', 'abs_best', 'mean_best', 'median_best'])
    params = [c for c in df.columns if c.startswith('c')]

    for param in params:
        abs_best = df.loc[0, param]
        mean_best = df.groupby(param).agg({'f_test_loss': np.mean}).reset_index().iloc[0, 0]
        median_best = df.groupby(param).agg({'f_test_loss': np.median}).reset_index().iloc[0, 0]
        df_params = df_params.append({'parameter': param, 'abs_best': abs_best, 'mean_best': mean_best, 'median_best': median_best}, ignore_index=True)

    return df_params

In [7]:
def correlation_stats(df):
    corr_train_loss = df.corr()['train_loss']
    corr_test_loss = df.corr()['test_loss']
    corr_train_energy = df.corr()['train_energy']
    corr_train_loss = corr_train_loss[corr_train_loss.index.str.startswith(('c_'))].sort_values(ascending=False)
    corr_test_loss = corr_test_loss[corr_test_loss.index.str.startswith(('c_'))].sort_values(ascending=False)
    corr_train_energy = corr_train_energy[corr_train_energy.index.str.startswith(('c_'))].sort_values(ascending=False)
    return corr_train_loss, corr_test_loss, corr_train_energy

In [8]:
def plot_run(df, run_id):
    xlabel = 'epochs'
    ylabel = df[df.run_id == run_id].c_loss.unique()[0]
    title = f'run {run_id}'
    # we can add fancy std-err visualizations and co.
    df[df.run_id == run_id].train_loss.plot(x='epoch', legend='train loss', xlabel=xlabel, ylabel=ylabel, title=title, figsize=(10, 4))
    df[df.run_id == run_id].test_loss.plot(x='epoch', legend='test loss', xlabel=xlabel, ylabel=ylabel, title=title, figsize=(10, 4))

In [9]:
# with warnings.catch_warnings():
df = extract(runs, maxrun=None)
df.to_csv(f'{dataset}-wandb-runs.csv')

df_bp = df[(df.c_training == 'bp')]
df_pc = df[(df.c_training == 'pc')]

Fetched 1127 runs in the-real-dl/bio-transformers (max runs: all)


In [10]:
top_bp = sort_runs_by_test_loss(df_bp, top=100)
top_pc = sort_runs_by_test_loss(df_pc, top=100)

In [11]:
top_bp[:5]

Unnamed: 0,sweep_id,run_id,c_lr,c_gamma,c_momentum,c_training,c_optimizer,c_batch_size,c_weight_decay,c_learning_rate,f__runtime,f_test_loss,f_train_loss
0,8jmex28k,025gx8z4,0.01,0.1,0.9,bp,momentum,32,0.001,0.01,1330.086605,0.311491,0.22653
1,8jmex28k,e3vnc36c,0.01,0.1,0.9,bp,momentum,32,0.001,0.01,1353.360908,0.311491,0.22653
2,8jmex28k,5p13abvx,0.01,0.1,0.9,bp,momentum,64,0.001,0.01,1157.148601,0.314865,0.226759
3,8jmex28k,mlgfi1bj,0.01,0.1,0.9,bp,momentum,64,0.001,0.01,1324.925439,0.314865,0.226759
4,8jmex28k,lmnxfbys,0.01,0.5,0.9,bp,momentum,32,0.001,0.01,1486.210035,0.314968,0.174141


In [12]:
top_pc[:5]

Unnamed: 0,sweep_id,run_id,c_lr,c_clr,c_init,c_gamma,c_pc_clr,c_pc_init,c_momentum,c_pc_gamma,...,c_x_optimizer,c_pc_optimizer,c_weight_decay,c_learning_rate,c_pc_weight_dacay,c_pc_weight_decay,f__runtime,f_test_loss,f_train_energy,f_train_loss
0,slmsr4uw,47s9d52e,0.0005,0.1,forward,,0.1,forward,,0.5,...,momentum,momentum,0.0005,0.0005,0.0,0.0,45830.154316,1.608991,0.00433,1.561306
1,slmsr4uw,upd94yyg,0.0005,0.1,forward,,0.1,forward,,0.5,...,momentum,momentum,0.001,0.0005,0.0,0.0,24083.956905,1.613096,0.00383,1.554434
2,slmsr4uw,w9sj18t0,0.0001,0.001,forward,,0.001,forward,,0.5,...,momentum,momentum,0.0005,0.0001,0.0,0.0,23537.590698,1.614908,0.001223,1.57266
3,nki5ynwb,om5z7fwx,0.005,0.0001,forward,,0.0001,forward,,0.0,...,momentum,momentum,0.0,0.005,0.0,0.0,22243.820155,1.616077,0.027488,1.580982
4,nki5ynwb,5pa4nh9s,0.01,0.0001,forward,,0.0001,forward,,0.01,...,momentum,momentum,0.0,0.01,0.0,0.0,22431.62581,1.616708,0.000425,1.573535


In [13]:
top_parameters(top_bp)

Unnamed: 0,parameter,abs_best,mean_best,median_best
0,c_lr,0.01,0.0005,0.0005
1,c_gamma,0.1,0,0
2,c_momentum,0.9,0,0
3,c_training,bp,bp,bp
4,c_optimizer,momentum,adam,adam
5,c_batch_size,32,32,32
6,c_weight_decay,0.001,0.0005,0.0005
7,c_learning_rate,0.01,0.0005,0.0005


In [14]:
top_parameters(top_pc)

Unnamed: 0,parameter,abs_best,mean_best,median_best
0,c_lr,0.0005,0.0001,0.0001
1,c_clr,0.1,0.0001,0.0001
2,c_init,forward,forward,forward
3,c_gamma,,0,0
4,c_pc_clr,0.1,0.0001,0.0001
5,c_pc_init,forward,forward,forward
6,c_momentum,,0,0
7,c_pc_gamma,0.5,0,0
8,c_pc_iters,50,50,50
9,c_training,pc,pc,pc


In [15]:
_, bp_corr_test_loss, _ = correlation_stats(df_bp)
_, pc_corr_test_loss, _ = correlation_stats(df_pc)

In [16]:
bp_corr_test_loss

c_learning_rate      0.048010
c_lr                 0.048010
c_gamma              0.026940
c_momentum           0.017710
c_batch_size        -0.065168
c_batch-size        -0.065168
c_weight_decay      -0.106239
c_clr                     NaN
c_seed                    NaN
c_epochs                  NaN
c_pc_clr                  NaN
c_dropout                 NaN
c_pc_gamma                NaN
c_pc_iters                NaN
c_iterations              NaN
c_pc_momentum             NaN
c_pc_weight_dacay         NaN
c_pc_weight_decay         NaN
Name: test_loss, dtype: float64

In [17]:
pc_corr_test_loss

c_learning_rate      0.112843
c_lr                 0.112843
c_pc_weight_decay    0.024164
c_pc_weight_dacay    0.024164
c_momentum           0.006555
c_pc_momentum       -0.006531
c_gamma             -0.015001
c_batch_size        -0.059157
c_batch-size        -0.059157
c_weight_decay      -0.061970
c_pc_gamma          -0.106021
c_pc_clr            -0.138347
c_clr               -0.138347
c_seed                    NaN
c_epochs                  NaN
c_dropout                 NaN
c_pc_iters                NaN
c_iterations              NaN
Name: test_loss, dtype: float64