In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
def get_params_per_config(texts):
    
    params_count_arr = []
    for line in texts:
        if 'params' in line.lower():
            p = line.split(":")[-1]
            params_count_arr.append(p)
    ms_count_arr = np.array(params_count_arr).astype(np.int)
    #print(params_count_arr)
    return ms_count_arr

def get_num_configs(texts):
    
    num_configs = 0
    for line in texts:
        if 'config:' in line.lower():
            num_configs += 1
        
    #print("Number of configurations:{}".format(num_configs))
    return num_configs
    
def get_telapsed_per_config(texts):
    
    config_count = 0
    t_arr = []
    for line in texts:
        if 'config:' in line.lower():
            #print(line)
            config_count += 1
        elif 'time_elapsed' in line.lower():
            t = line.strip().split(":")[1]
            #print("Time for Config {}: {} s \n".format(config_count, t))
            t_arr.append(t)

    t_arr = np.array(t_arr).astype(np.float)
    return t_arr

In [3]:
def get_best_osize_df(df, group_by, sort_field, n_top):
    df1 = df.groupby(group_by).apply(
    lambda x: x.sort_values(by=sort_field,
                            ascending=True, 
                            ignore_index=True).head(n_top)
    )
    return df1

In [4]:
def get_model_gsresults(folder, model_type, group_by=None, n_top=None):
    
    # Get the logfile name
    logfile = "gs_training_{}_M50_P500_N200.log".format(model_type)
    
    # Get the .json file name
    jsonfile = "grid_search_results_{}_vars_NS25000.json".format(model_type)
    
    # Get the full file path
    logfile_path = os.path.join(folder, logfile)
    jsonfile_path = os.path.join(folder, jsonfile)
    
    # Check if the file is present or not
    assert os.path.exists(logfile_path) == True, print("Log file not found!!")
    assert os.path.exists(jsonfile_path) == True, print("Grid search related Json file not found!!")
    
    # Extract the dataframe
    df = pd.read_json(jsonfile_path)
    
    # Get the logfile lines
    with open(logfile_path, 'r') as f:
        texts = f.readlines()
    
    #t_arr = get_telapsed_per_config(texts) # get the time elapsed as an array per config
    #params_count_arr = get_params_per_config(texts) # get the no. of params per config
    
    #print(t_arr)
    
    ## Add the columns from the logfile
    #df['Time_Elapsed'] = t_arr
    #df['Num_Params'] = params_count_arr
    
    # Get the dataframe based on best val.set performance grouped by output_size
    df_by_osize = get_best_osize_df(df, group_by, 'val_loss_best', n_top)
    
    return df, df_by_osize

In [5]:
df_gru, df_gru_by_nhidden = get_model_gsresults(folder='../log/estimate_theta_vars/gru_L2_H30_modified_RNN/',
                                              model_type='gru',
                                              group_by='n_hidden',
                                              n_top=4)

In [6]:
# Print the dataframe
df_gru

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,min_delta,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,gru,1,2,30,1,0.001,3000,0.01,32,1,0.006492,0.006749,0.006492,0.006749
1,gru,1,2,30,1,0.001,3000,0.01,40,2,0.005694,0.006064,0.005694,0.006064
2,gru,1,2,30,2,0.001,3000,0.01,32,3,0.005055,0.005583,0.005055,0.005583
3,gru,1,2,30,2,0.001,3000,0.01,40,4,0.005542,0.005875,0.005542,0.005875
4,gru,1,2,40,1,0.001,3000,0.01,32,5,0.005337,0.005781,0.005337,0.005781
5,gru,1,2,40,1,0.001,3000,0.01,40,6,0.005605,0.005999,0.005605,0.005999
6,gru,1,2,40,2,0.001,3000,0.01,32,7,0.005204,0.005762,0.005204,0.005762
7,gru,1,2,40,2,0.001,3000,0.01,40,8,0.005173,0.005766,0.005173,0.005766
8,gru,1,2,50,1,0.001,3000,0.01,32,9,0.006117,0.006271,0.006117,0.006271
9,gru,1,2,50,1,0.001,3000,0.01,40,10,0.005953,0.006264,0.005953,0.006264


In [7]:
print(df_gru.to_latex())

\begin{tabular}{llrrrrrrrrrrrrr}
\toprule
{} & model\_type &  input\_size &  output\_size &  n\_hidden &  n\_layers &     lr &  num\_epochs &  min\_delta &  n\_hidden\_dense &  Config\_no &  tr\_loss\_end &  val\_loss\_end &  tr\_loss\_best &  val\_loss\_best \\
\midrule
0  &        gru &           1 &            2 &        30 &         1 &  0.001 &        3000 &       0.01 &              32 &          1 &     0.006492 &      0.006749 &      0.006492 &       0.006749 \\
1  &        gru &           1 &            2 &        30 &         1 &  0.001 &        3000 &       0.01 &              40 &          2 &     0.005694 &      0.006064 &      0.005694 &       0.006064 \\
2  &        gru &           1 &            2 &        30 &         2 &  0.001 &        3000 &       0.01 &              32 &          3 &     0.005055 &      0.005583 &      0.005055 &       0.005583 \\
3  &        gru &           1 &            2 &        30 &         2 &  0.001 &        3000 &       0.01 &             

In [12]:
# Print the dataframe with best entries
df_gru_by_nhidden

Unnamed: 0_level_0,Unnamed: 1_level_0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,min_delta,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
n_hidden,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
30,0,gru,1,2,30,2,0.001,3000,0.01,32,3,0.005055,0.005583,0.005055,0.005583
30,1,gru,1,2,30,2,0.001,3000,0.01,40,4,0.005542,0.005875,0.005542,0.005875
30,2,gru,1,2,30,1,0.001,3000,0.01,40,2,0.005694,0.006064,0.005694,0.006064
30,3,gru,1,2,30,1,0.001,3000,0.01,32,1,0.006492,0.006749,0.006492,0.006749
40,0,gru,1,2,40,2,0.001,3000,0.01,32,7,0.005204,0.005762,0.005204,0.005762
40,1,gru,1,2,40,2,0.001,3000,0.01,40,8,0.005173,0.005766,0.005173,0.005766
40,2,gru,1,2,40,1,0.001,3000,0.01,32,5,0.005337,0.005781,0.005337,0.005781
40,3,gru,1,2,40,1,0.001,3000,0.01,40,6,0.005605,0.005999,0.005605,0.005999
50,0,gru,1,2,50,2,0.001,3000,0.01,40,12,0.004431,0.00575,0.004431,0.00575
50,1,gru,1,2,50,2,0.001,3000,0.01,32,11,0.004839,0.005842,0.004839,0.005842


In [13]:
df_gru_by_nhidden.sort_values(by='val_loss_best',
                            ascending=True, 
                            ignore_index=True)                            

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,min_delta,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,gru,1,2,30,2,0.001,3000,0.01,32,3,0.005055,0.005583,0.005055,0.005583
1,gru,1,2,50,2,0.001,3000,0.01,40,12,0.004431,0.00575,0.004431,0.00575
2,gru,1,2,40,2,0.001,3000,0.01,32,7,0.005204,0.005762,0.005204,0.005762
3,gru,1,2,40,2,0.001,3000,0.01,40,8,0.005173,0.005766,0.005173,0.005766
4,gru,1,2,60,1,0.001,3000,0.01,40,14,0.005114,0.005771,0.005114,0.005771
5,gru,1,2,40,1,0.001,3000,0.01,32,5,0.005337,0.005781,0.005337,0.005781
6,gru,1,2,50,2,0.001,3000,0.01,32,11,0.004839,0.005842,0.004839,0.005842
7,gru,1,2,30,2,0.001,3000,0.01,40,4,0.005542,0.005875,0.005542,0.005875
8,gru,1,2,60,1,0.001,3000,0.01,32,13,0.005166,0.005953,0.005166,0.005953
9,gru,1,2,40,1,0.001,3000,0.01,40,6,0.005605,0.005999,0.005605,0.005999


In [23]:
df_lstm, df_lstm_by_nhidden = get_model_gsresults(folder='../log/estimate_theta_vars/lstm_L2_H40_modified_RNN/',
                                              model_type='lstm',
                                              group_by='n_hidden',
                                              n_top=3)

In [24]:
df_lstm

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,min_delta,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,lstm,1,2,30,1,0.001,3000,0.01,32,1,0.005443,0.005874,0.005443,0.005874
1,lstm,1,2,30,1,0.001,3000,0.01,40,2,0.006798,0.007013,0.006798,0.007013
2,lstm,1,2,30,2,0.001,3000,0.01,32,3,0.005694,0.005925,0.005694,0.005925
3,lstm,1,2,30,2,0.001,3000,0.01,40,4,0.005425,0.005981,0.005425,0.005981
4,lstm,1,2,40,1,0.001,3000,0.01,32,5,0.006287,0.006506,0.006287,0.006506
5,lstm,1,2,40,1,0.001,3000,0.01,40,6,0.006688,0.00684,0.006688,0.00684
6,lstm,1,2,40,2,0.001,3000,0.01,32,7,0.006274,0.006813,0.006274,0.006813
7,lstm,1,2,40,2,0.001,3000,0.01,40,8,0.005022,0.005781,0.005022,0.005781
8,lstm,1,2,50,1,0.001,3000,0.01,32,9,0.004672,0.005693,0.004672,0.005693
9,lstm,1,2,50,1,0.001,3000,0.01,40,10,0.006573,0.007028,0.006573,0.007028


In [29]:
print(df_lstm.to_latex())

\begin{tabular}{llrrrrrrrrrrrrr}
\toprule
{} & model\_type &  input\_size &  output\_size &  n\_hidden &  n\_layers &     lr &  num\_epochs &  min\_delta &  n\_hidden\_dense &  Config\_no &  tr\_loss\_end &  val\_loss\_end &  tr\_loss\_best &  val\_loss\_best \\
\midrule
0  &       lstm &           1 &            2 &        30 &         1 &  0.001 &        3000 &       0.01 &              32 &          1 &     0.005443 &      0.005874 &      0.005443 &       0.005874 \\
1  &       lstm &           1 &            2 &        30 &         1 &  0.001 &        3000 &       0.01 &              40 &          2 &     0.006798 &      0.007013 &      0.006798 &       0.007013 \\
2  &       lstm &           1 &            2 &        30 &         2 &  0.001 &        3000 &       0.01 &              32 &          3 &     0.005694 &      0.005925 &      0.005694 &       0.005925 \\
3  &       lstm &           1 &            2 &        30 &         2 &  0.001 &        3000 &       0.01 &             

In [26]:
df_lstm_by_nhidden

Unnamed: 0_level_0,Unnamed: 1_level_0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,min_delta,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
n_hidden,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
30,0,lstm,1,2,30,1,0.001,3000,0.01,32,1,0.005443,0.005874,0.005443,0.005874
30,1,lstm,1,2,30,2,0.001,3000,0.01,32,3,0.005694,0.005925,0.005694,0.005925
30,2,lstm,1,2,30,2,0.001,3000,0.01,40,4,0.005425,0.005981,0.005425,0.005981
40,0,lstm,1,2,40,2,0.001,3000,0.01,40,8,0.005022,0.005781,0.005022,0.005781
40,1,lstm,1,2,40,1,0.001,3000,0.01,32,5,0.006287,0.006506,0.006287,0.006506
40,2,lstm,1,2,40,2,0.001,3000,0.01,32,7,0.006274,0.006813,0.006274,0.006813
50,0,lstm,1,2,50,1,0.001,3000,0.01,32,9,0.004672,0.005693,0.004672,0.005693
50,1,lstm,1,2,50,2,0.001,3000,0.01,40,12,0.005582,0.006,0.005582,0.006
50,2,lstm,1,2,50,2,0.001,3000,0.01,32,11,0.003501,0.006556,0.003501,0.006556
60,0,lstm,1,2,60,1,0.001,3000,0.01,40,14,0.004533,0.005673,0.004533,0.005673


In [27]:
df_lstm_by_nhidden.sort_values(by='val_loss_best',
                            ascending=True, 
                            ignore_index=True)    

Unnamed: 0,model_type,input_size,output_size,n_hidden,n_layers,lr,num_epochs,min_delta,n_hidden_dense,Config_no,tr_loss_end,val_loss_end,tr_loss_best,val_loss_best
0,lstm,1,2,60,1,0.001,3000,0.01,40,14,0.004533,0.005673,0.004533,0.005673
1,lstm,1,2,50,1,0.001,3000,0.01,32,9,0.004672,0.005693,0.004672,0.005693
2,lstm,1,2,40,2,0.001,3000,0.01,40,8,0.005022,0.005781,0.005022,0.005781
3,lstm,1,2,30,1,0.001,3000,0.01,32,1,0.005443,0.005874,0.005443,0.005874
4,lstm,1,2,60,1,0.001,3000,0.01,32,13,0.005479,0.005896,0.005479,0.005896
5,lstm,1,2,60,2,0.001,3000,0.01,40,16,0.004748,0.005917,0.004748,0.005917
6,lstm,1,2,30,2,0.001,3000,0.01,32,3,0.005694,0.005925,0.005694,0.005925
7,lstm,1,2,30,2,0.001,3000,0.01,40,4,0.005425,0.005981,0.005425,0.005981
8,lstm,1,2,50,2,0.001,3000,0.01,40,12,0.005582,0.006,0.005582,0.006
9,lstm,1,2,40,1,0.001,3000,0.01,32,5,0.006287,0.006506,0.006287,0.006506
