# Parse Hyperparameters from Tune

This notebook parses hyperparameter and validation statistics from the tune runs

In [65]:
import os
import pandas as pd

!rsync -avz -e "ssh" marc@uni:/data2/breizhcrops/tune /tmp/
    
tuneresults = "/tmp/tune"
runs = os.listdir(tuneresults)
runs = [run for run in runs if os.path.exists(os.path.join(tuneresults,run,"trainlog.csv"))]

receiving incremental file list
tune/
tune/InceptionTime_input-dim=13_num-classes=9_hidden-dims=128_num-layers=3_learning-rate=0.0009980594748806762_weight-decay=0.0010626996351948946/trainlog.csv
tune/InceptionTime_input-dim=13_num-classes=9_hidden-dims=64_num-layers=3_learning-rate=0.0017112502300294895_weight-decay=5.508381701917304e-05/
tune/InceptionTime_input-dim=13_num-classes=9_hidden-dims=64_num-layers=4_learning-rate=0.00028098649833653664_weight-decay=0.0008642986871646551/trainlog.csv
tune/LSTM_input-dim=13_num-classes=9_hidden-dims=128_num-layers=1_bidirectional=False_use-layernorm=True_dropout=0.7369083498541_learning-rate=0.00013807584269740695_weight-decay=0.000944157330485858/trainlog.csv
tune/MSResNet_input-dim=13_num-classes=9_hidden-dims=32_learning-rate=0.00010099699324166293_weight-decay=5.221576864451801e-05/trainlog.csv
tune/MSResNet_input-dim=13_num-classes=9_hidden-dims=32_learning-rate=0.00010586983718434764_weight-decay=1.4729300901739634e-07/
tune/MSResNet_

In [72]:
def parse_hyperparameters_folder_name(run):
    
    run = os.path.basename(run)
    model, *hyperparameter_str = run.split("_")

    hyperparameter = dict()
    for kv in hyperparameter_str:
        k,v = kv.split("=")
        hyperparameter[k] = v
    hyperparameter["model"] = model
    return hyperparameter

In [73]:
def read_best_epoch_stats(run):
    df = pd.read_csv(os.path.join(run,"trainlog.csv"), index_col=0)
    bestepoch = df["testloss"].idxmin()
    best_stats = df.loc[bestepoch].to_dict()
    best_stats["epoch"] = bestepoch
    return best_stats

In [74]:

def merge(dict1,dict2):
    for k,v in dict2.items():
        dict1[k] = v
    return dict1

In [75]:
results = list()
for run in runs:
    result = parse_hyperparameters_folder_name(os.path.join(tuneresults,run))
    best_epoch_stats = read_best_epoch_stats(os.path.join(tuneresults,run))
    result = merge(result,best_epoch_stats)
    results.append(result)
results = pd.DataFrame(results)
results