## Hyperparameter Run Analyzer

Use this notebook to analyze the history dataframes that result from a hyperparameter run. See which hyperparameters led to the lowest losses, and which caused everything to break

In [38]:
import os
import sys
import pandas as pd
import shutil

cwd = os.getcwd()
model_dir = os.path.join(cwd, '..', '..', 'local', 'old_models')

HISTORY_DATAFRAME_FILE = 'estimator_gaze_1m_20d_22hr_56min.pdpckle'
history_path = os.path.join(model_dir, HISTORY_DATAFRAME_FILE)

In [41]:
# Load pandas dataframe
df = pd.read_pickle(history_path)

# Sort by loss ascending
df = df.sort_values('loss')

# Print available columns
print(df.columns)

# Select specific sub-columns
# df[['id', 'loss', 'steps', 'learning_rate', 'dropout_keep_prob', 'batch_norm']]
# df[['id', 'loss', 'steps', 'num_rb', 'rb_feat', 'rb_kernel']]
df[['id', 'loss', 'steps', 'dimred_feat', 'dimred_kernel', 'dimred_stride', 'fc_layers']]

Index(['model_name', 'dropout_keep_prob', 'optimizer_type', 'learning_rate',
       'num_rb', 'rb_feat', 'rb_kernel', 'batch_norm', 'fc_layers',
       'dimred_feat', 'dimred_kernel', 'dimred_stride', 'id', 'loss', 'steps',
       'rmse'],
      dtype='object')


Unnamed: 0,id,loss,steps,dimred_feat,dimred_kernel,dimred_stride,fc_layers
251,bff5df9b-d0ef-4024-a8cc-afcab912eac5,3.002011e-02,38,16,8,2,[64]
1122,0704e1f7-283d-4604-ba09-1b4b7bc38ecf,4.066640e-02,49,64,8,4,[128]
806,eeba4de4-74f1-4aac-bcac-565b246df66f,4.161888e-02,49,32,6,4,[32]
1015,e4c2eb54-5b6c-47bc-9d4a-ce1a08a99196,4.949666e-02,49,64,4,4,[32]
645,1d754f33-384b-4353-bf69-0b2406c592dc,5.060942e-02,49,16,8,4,"[128, 32]"
846,3dc1a220-e2a6-493a-9fa6-c30c7de73bc4,5.084727e-02,42,16,8,2,[32]
527,ce7cb8d5-e6b9-4de3-927b-66434253c376,5.133777e-02,29,32,4,2,[32]
942,50fed0f5-26e9-4e52-ba62-b3054d9d77ca,5.165698e-02,49,16,8,2,[32]
1129,763b3677-cf1e-49c5-b986-737514a6b829,5.437913e-02,49,64,3,4,[32]
459,af34a5d4-10d8-4d2b-b44e-30619ef0f967,5.764963e-02,43,64,3,2,[64]


# Delete the worse runs

In [31]:
# Get the number of rows
num_rows = len(df.index)

# Cull the bottom X Percent
CULLING_PERCENT = 0.95
cull_number = int(num_rows * CULLING_PERCENT)
print('There are %d nets, we will cull %d of them' % (num_rows, cull_number))

# Sort by loss descending
df = df.sort_values('loss', ascending=False).head(cull_number)

# Get rid of the crappy runs
those_about_to_die = df['id'].tolist()
for id in those_about_to_die:
    try:
        shutil.rmtree(os.path.join(model_dir, id))
    except FileNotFoundError:
        pass

There are 1131 nets, we will cull 1074 of them
