In [1]:
import os
import sys

SCRIPT_DIR = os.path.dirname(os.path.abspath('__file__'))
sys.path.append(os.path.dirname(SCRIPT_DIR))

import pandas as pd
import numpy as np
import os

In [2]:
def get_params_per_config(texts):
    
    params_count_arr = []
    for line in texts:
        if 'params' in line.lower():
            p = line.split(":")[-1]
            params_count_arr.append(p)
    ms_count_arr = np.array(params_count_arr).astype(np.int)
    #print(params_count_arr)
    return ms_count_arr

def get_num_configs(texts):
    
    num_configs = 0
    for line in texts:
        if 'config:' in line.lower():
            num_configs += 1
        
    #print("Number of configurations:{}".format(num_configs))
    return num_configs
    
def get_telapsed_per_config(texts):
    
    config_count = 0
    t_arr = []
    for line in texts:
        if 'config:' in line.lower():
            #print(line)
            config_count += 1
        elif 'time_elapsed' in line.lower():
            t = line.strip().split(":")[1]
            #print("Time for Config {}: {} s \n".format(config_count, t))
            t_arr.append(t)

    t_arr = np.array(t_arr).astype(np.float)
    return t_arr

In [3]:
def get_best_osize_df(df, group_by, sort_field, n_top):
    df1 = df.groupby(group_by).apply(
    lambda x: x.sort_values(by=sort_field,
                            ascending=True, 
                            ignore_index=True).head(n_top)
    )
    return df1

In [4]:
def get_model_gsresults(folder, logfile, jsonfile, model_type, group_by=None, n_top=None):
    
    # Get the logfile name
    #logfile = "gs_training_{}_usenorm_0.log".format(model_type)
    
    # Get the .json file name
    #jsonfile = "grid_search_results_{}.json".format(model_type)
    
    # Get the full file path
    logfile_path = os.path.join(folder, logfile)
    jsonfile_path = os.path.join(folder, jsonfile)
    
    # Check if the file is present or not
    assert os.path.exists(logfile_path) == True, print("Log file not found!!")
    assert os.path.exists(jsonfile_path) == True, print("Grid search related Json file not found!!")
    
    # Extract the dataframe
    df = pd.read_json(jsonfile_path)
    
    # Get the logfile lines
    with open(logfile_path, 'r') as f:
        texts = f.readlines()
    
    #t_arr = get_telapsed_per_config(texts) # get the time elapsed as an array per config
    #params_count_arr = get_params_per_config(texts) # get the no. of params per config
    
    #print(t_arr)
    
    ## Add the columns from the logfile
    #df['Time_Elapsed'] = t_arr
    #df['Num_Params'] = params_count_arr
    
    # Get the dataframe based on best val.set performance grouped by output_size
    df_by_osize = get_best_osize_df(df, group_by, 'val_loss_best', n_top)
    
    return df, df_by_osize

In [5]:
df_gru, df_gru_by_nhidden = get_model_gsresults(folder='../log/estimate_theta_pfixed/gru_L2_H60_modified_RNN_M50_P500_N200_2/',
                                              logfile='gs_training_modified_RNN_gru_M50_P500_N200.log',
                                              jsonfile='grid_search_results_gru_pfixed_NS25000.json',
                                              model_type='gru',
                                              group_by='n_hidden',
                                              n_top=4)

In [6]:
# Print the dataframe
df_gru

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,gru,1,5,40,1,0.0005,6000,32,1,0.164993,0.254848,0.164993,0.254848
1,gru,1,5,40,1,0.001,6000,32,2,0.12236,0.268092,0.12236,0.268092
2,gru,1,5,40,2,0.0005,6000,32,3,0.172683,0.26653,0.172683,0.26653
3,gru,1,5,40,2,0.001,6000,32,4,0.038752,0.307156,0.038752,0.307156
4,gru,1,5,50,1,0.0005,6000,32,5,0.135531,0.252254,0.135531,0.252254
5,gru,1,5,50,1,0.001,6000,32,6,0.131058,0.271367,0.131058,0.271367
6,gru,1,5,50,2,0.0005,6000,32,7,0.032016,0.296225,0.032016,0.296225
7,gru,1,5,50,2,0.001,6000,32,8,0.029908,0.323424,0.029908,0.323424
8,gru,1,5,60,1,0.0005,6000,32,9,0.077406,0.2847,0.077406,0.2847
9,gru,1,5,60,1,0.001,6000,32,10,0.088946,0.286369,0.088946,0.286369


In [7]:
# Print the dataframe with best entries
df_gru_by_nhidden

Unnamed: 0_level_0,Unnamed: 1_level_0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
n_hidden,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
40,0,gru,1,5,40,1,0.0005,6000,32,1,0.164993,0.254848,0.164993,0.254848
40,1,gru,1,5,40,2,0.0005,6000,32,3,0.172683,0.26653,0.172683,0.26653
40,2,gru,1,5,40,1,0.001,6000,32,2,0.12236,0.268092,0.12236,0.268092
40,3,gru,1,5,40,2,0.001,6000,32,4,0.038752,0.307156,0.038752,0.307156
50,0,gru,1,5,50,1,0.0005,6000,32,5,0.135531,0.252254,0.135531,0.252254
50,1,gru,1,5,50,1,0.001,6000,32,6,0.131058,0.271367,0.131058,0.271367
50,2,gru,1,5,50,2,0.0005,6000,32,7,0.032016,0.296225,0.032016,0.296225
50,3,gru,1,5,50,2,0.001,6000,32,8,0.029908,0.323424,0.029908,0.323424
60,0,gru,1,5,60,1,0.0005,6000,32,9,0.077406,0.2847,0.077406,0.2847
60,1,gru,1,5,60,1,0.001,6000,32,10,0.088946,0.286369,0.088946,0.286369


In [8]:
df_gru_by_nhidden.sort_values(by='val_loss_best',
                            ascending=True, 
                            ignore_index=True)                            

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,gru,1,5,50,1,0.0005,6000,32,5,0.135531,0.252254,0.135531,0.252254
1,gru,1,5,40,1,0.0005,6000,32,1,0.164993,0.254848,0.164993,0.254848
2,gru,1,5,70,1,0.0005,6000,32,13,0.062586,0.255173,0.062586,0.255173
3,gru,1,5,40,2,0.0005,6000,32,3,0.172683,0.26653,0.172683,0.26653
4,gru,1,5,40,1,0.001,6000,32,2,0.12236,0.268092,0.12236,0.268092
5,gru,1,5,50,1,0.001,6000,32,6,0.131058,0.271367,0.131058,0.271367
6,gru,1,5,70,1,0.001,6000,32,14,0.073224,0.272771,0.073224,0.272771
7,gru,1,5,70,2,0.001,6000,32,16,0.008287,0.279866,0.008287,0.279866
8,gru,1,5,60,1,0.0005,6000,32,9,0.077406,0.2847,0.077406,0.2847
9,gru,1,5,60,1,0.001,6000,32,10,0.088946,0.286369,0.088946,0.286369


In [9]:
df_lstm, df_lstm_by_nhidden = get_model_gsresults(folder='../log/estimate_theta_pfixed/lstm_L2_H50_modified_RNN/',
                                              logfile='gs_training_modified_RNN_lstm_M50_P500_N200.log',
                                              jsonfile='grid_search_results_lstm_pfixed_NS25000.json',
                                              model_type='lstm',
                                              group_by='n_hidden',
                                              n_top=4)

In [10]:
df_lstm_by_nhidden.sort_values(by='val_loss_best',
                            ascending=True, 
                            ignore_index=True)

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,lstm,1,5,40,2,0.0005,6000,32,3,0.127933,0.241499,0.127933,0.241499
1,lstm,1,5,50,2,0.0005,6000,32,7,0.07077,0.244626,0.07077,0.244626
2,lstm,1,5,60,2,0.0005,6000,32,11,0.050841,0.25755,0.050841,0.25755
3,lstm,1,5,40,1,0.0005,6000,32,1,0.199301,0.260964,0.199301,0.260964
4,lstm,1,5,50,1,0.0005,6000,32,5,0.180653,0.265856,0.180653,0.265856
5,lstm,1,5,60,1,0.0005,6000,32,9,0.125736,0.267459,0.125736,0.267459
6,lstm,1,5,40,2,0.001,6000,32,4,0.083345,0.273615,0.083345,0.273615
7,lstm,1,5,70,1,0.0005,6000,32,13,0.104575,0.274466,0.104575,0.274466
8,lstm,1,5,70,2,0.001,6000,32,16,0.020491,0.278934,0.020491,0.278934
9,lstm,1,5,40,1,0.001,6000,32,2,0.138897,0.280745,0.138897,0.280745
