In [1]:
import pandas as pd
import numpy as np
import os

In [18]:
def get_params_per_config(texts):
    
    params_count_arr = []
    for line in texts:
        if 'params' in line.lower():
            p = line.split(":")[-1]
            params_count_arr.append(p)
    ms_count_arr = np.array(params_count_arr).astype(np.int)
    #print(params_count_arr)
    return ms_count_arr

def get_num_configs(texts):
    
    num_configs = 0
    for line in texts:
        if 'config:' in line.lower():
            num_configs += 1
        
    #print("Number of configurations:{}".format(num_configs))
    return num_configs
    
def get_telapsed_per_config(texts):
    
    config_count = 0
    t_arr = []
    for line in texts:
        if 'config:' in line.lower():
            #print(line)
            config_count += 1
        elif 'time elapsed measured' in line.lower():
            t = line.strip().split(":")[1]
            #print("Time for Config {}: {} s \n".format(config_count, t))
            t_arr.append(t)

    t_arr = np.array(t_arr).astype(np.float)
    return t_arr

In [19]:
def get_best_osize_df(df, group_by, sort_field, n_top):
    df1 = df.groupby(group_by).apply(
    lambda x: x.sort_values(by=sort_field,
                            ascending=True, 
                            ignore_index=True).head(n_top)
    )
    return df1

In [20]:
def get_model_gsresults(folder, model_type, solar_cycle, group_by=None, n_top=None):
    
    # Get the logfile name
    logfile = "{}_gs_cycle_{}_logs.txt".format(model_type, solar_cycle)
    
    # Get the .json file name
    jsonfile = "gsresults_{}_cycle{}.json".format(model_type, solar_cycle)
    
    # Get the full file path
    logfile_path = os.path.join(folder, logfile)
    jsonfile_path = os.path.join(folder, jsonfile)
    
    # Check if the file is present or not
    assert os.path.exists(logfile_path) == True, print("Log file not found!!")
    assert os.path.exists(jsonfile_path) == True, print("Grid search related Json file not found!!")
    
    # Extract the dataframe
    df = pd.read_json(jsonfile_path)
    
    # Get the logfile lines
    with open(logfile_path, 'r') as f:
        texts = f.readlines()
    
    t_arr = get_telapsed_per_config(texts) # get the time elapsed as an array per config
    params_count_arr = get_params_per_config(texts) # get the no. of params per config
    
    # Add the columns from the logfile
    df['Time_Elapsed'] = t_arr
    df['Num_Params'] = params_count_arr
    
    # Get the dataframe based on best val.set performance grouped by output_size
    df_by_osize = get_best_osize_df(df, group_by, 'Validation_Error', n_top)
    
    return df, df_by_osize

In [27]:
df_gru, df_gru_by_osize = get_model_gsresults(folder='./param_selection/solar_cycle_22_val_modified/',
                                              model_type='gru',
                                              solar_cycle=22,
                                              group_by='output_size',
                                              n_top=2)

In [28]:
# Print the dataframe
df_gru

Unnamed: 0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
0,1,1,20,2,1,gru,True,0.001,cpu,4000,0.01868,8.2e-05,3260.273872,3921
1,1,1,20,2,1,gru,True,0.001,cpu,5000,0.022987,8.1e-05,4204.892673,3921
2,1,5,20,2,1,gru,True,0.001,cpu,4000,0.02647,0.000444,821.297525,4005
3,1,5,20,2,1,gru,True,0.001,cpu,5000,0.03452,0.000433,1064.625585,4005
4,1,10,20,2,1,gru,True,0.001,cpu,4000,0.062637,0.000912,483.232461,4110
5,1,10,20,2,1,gru,True,0.001,cpu,5000,0.066112,0.000942,574.097484,4110
6,1,1,30,2,1,gru,True,0.001,cpu,4000,0.036004,8.1e-05,3535.251051,8581
7,1,1,30,2,1,gru,True,0.001,cpu,5000,0.037216,8.2e-05,4465.828382,8581
8,1,5,30,2,1,gru,True,0.001,cpu,4000,0.032227,0.000416,1127.595065,8705
9,1,5,30,2,1,gru,True,0.001,cpu,5000,0.034582,0.000465,1412.693537,8705


In [29]:
# Print the dataframe with best entries
df_gru_by_osize

Unnamed: 0_level_0,Unnamed: 1_level_0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
output_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,0,1,1,20,2,1,gru,True,0.001,cpu,4000,0.01868,8.2e-05,3260.273872,3921
1,1,1,1,60,2,1,gru,True,0.001,cpu,4000,0.022553,8.1e-05,3716.183661,33361
5,0,1,5,20,2,1,gru,True,0.001,cpu,4000,0.02647,0.000444,821.297525,4005
5,1,1,5,30,2,1,gru,True,0.001,cpu,4000,0.032227,0.000416,1127.595065,8705
10,0,1,10,40,2,1,gru,True,0.001,cpu,4000,0.025544,0.000702,478.019712,15410
10,1,1,10,30,2,1,gru,True,0.001,cpu,5000,0.04331,0.000791,606.79743,8860


In [67]:
print(len(df_gru_by_osize))
df_gru_by_osize.iloc[2].to_dict()

6


{'input_size': 1,
 'output_size': 5,
 'n_hidden': 20,
 'n_layers': 2,
 'num_directions': 1,
 'model_type': 'gru',
 'batch_first': True,
 'lr': 0.001,
 'device': 'cpu',
 'num_epochs': 4000,
 'Validation_Error': 0.026469580829143004,
 'Training_Error': 0.000444386940216,
 'Time_Elapsed': 821.2975248359144,
 'Num_Params': 4005}

In [30]:
df_gru_by_osize.sort_values(by='Validation_Error',
                            ascending=True, 
                            ignore_index=True)                            

Unnamed: 0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
0,1,1,20,2,1,gru,True,0.001,cpu,4000,0.01868,8.2e-05,3260.273872,3921
1,1,1,60,2,1,gru,True,0.001,cpu,4000,0.022553,8.1e-05,3716.183661,33361
2,1,10,40,2,1,gru,True,0.001,cpu,4000,0.025544,0.000702,478.019712,15410
3,1,5,20,2,1,gru,True,0.001,cpu,4000,0.02647,0.000444,821.297525,4005
4,1,5,30,2,1,gru,True,0.001,cpu,4000,0.032227,0.000416,1127.595065,8705
5,1,10,30,2,1,gru,True,0.001,cpu,5000,0.04331,0.000791,606.79743,8860


In [31]:
df_rnn, df_rnn_by_osize = get_model_gsresults(folder='./param_selection/solar_cycle_22_val_modified/',
                                              model_type='rnn',
                                              solar_cycle=22,
                                              group_by='output_size',
                                              n_top=2)

In [32]:
df_rnn

Unnamed: 0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
0,1,1,20,2,1,rnn,True,0.001,cpu,4000,0.080716,9.1e-05,1393.552061,1321
1,1,1,20,2,1,rnn,True,0.001,cpu,5000,0.041096,8.3e-05,1752.772146,1321
2,1,5,20,2,1,rnn,True,0.001,cpu,4000,0.045094,0.000448,361.850958,1405
3,1,5,20,2,1,rnn,True,0.001,cpu,5000,0.045219,0.000393,452.95201,1405
4,1,10,20,2,1,rnn,True,0.001,cpu,4000,0.07739,0.001024,234.114917,1510
5,1,10,20,2,1,rnn,True,0.001,cpu,5000,0.077685,0.001109,290.099715,1510
6,1,1,30,2,1,rnn,True,0.001,cpu,4000,0.03107,8.3e-05,1452.807329,2881
7,1,1,30,2,1,rnn,True,0.001,cpu,5000,0.023744,8.2e-05,1833.972837,2881
8,1,5,30,2,1,rnn,True,0.001,cpu,4000,0.063914,0.000416,636.986645,3005
9,1,5,30,2,1,rnn,True,0.001,cpu,5000,0.035451,0.00041,796.896372,3005


In [33]:
df_rnn_by_osize

Unnamed: 0_level_0,Unnamed: 1_level_0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
output_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,0,1,1,30,2,1,rnn,True,0.001,cpu,5000,0.023744,8.2e-05,1833.972837,2881
1,1,1,1,30,2,1,rnn,True,0.001,cpu,4000,0.03107,8.3e-05,1452.807329,2881
5,0,1,5,60,2,1,rnn,True,0.001,cpu,5000,0.028442,0.000351,688.853066,11405
5,1,1,5,50,2,1,rnn,True,0.001,cpu,4000,0.033879,0.000389,603.217539,8005
10,0,1,10,40,2,1,rnn,True,0.001,cpu,5000,0.048315,0.001004,299.869923,5410
10,1,1,10,40,2,1,rnn,True,0.001,cpu,4000,0.073215,0.000968,240.416127,5410


In [34]:
df_rnn_by_osize.sort_values(by='Validation_Error',
                            ascending=True, 
                            ignore_index=True)    

Unnamed: 0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
0,1,1,30,2,1,rnn,True,0.001,cpu,5000,0.023744,8.2e-05,1833.972837,2881
1,1,5,60,2,1,rnn,True,0.001,cpu,5000,0.028442,0.000351,688.853066,11405
2,1,1,30,2,1,rnn,True,0.001,cpu,4000,0.03107,8.3e-05,1452.807329,2881
3,1,5,50,2,1,rnn,True,0.001,cpu,4000,0.033879,0.000389,603.217539,8005
4,1,10,40,2,1,rnn,True,0.001,cpu,5000,0.048315,0.001004,299.869923,5410
5,1,10,40,2,1,rnn,True,0.001,cpu,4000,0.073215,0.000968,240.416127,5410


In [35]:
df_lstm, df_lstm_by_osize = get_model_gsresults(folder='./param_selection/solar_cycle_22_val_modified/',
                                              model_type='lstm',
                                              solar_cycle=22,
                                              group_by='output_size',
                                              n_top=2)

In [36]:
df_lstm

Unnamed: 0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
0,1,1,20,2,1,lstm,True,0.001,cpu,4000,0.05971,8.1e-05,3000.726706,5221
1,1,1,20,2,1,lstm,True,0.001,cpu,5000,0.075688,8.2e-05,3759.61079,5221
2,1,5,20,2,1,lstm,True,0.001,cpu,4000,0.030661,0.000448,699.725517,5305
3,1,5,20,2,1,lstm,True,0.001,cpu,5000,0.06243,0.000397,876.592338,5305
4,1,10,20,2,1,lstm,True,0.001,cpu,4000,0.072277,0.001119,426.11763,5410
5,1,10,20,2,1,lstm,True,0.001,cpu,5000,0.0427,0.001006,533.536996,5410
6,1,1,30,2,1,lstm,True,0.001,cpu,4000,0.036997,8e-05,3181.618985,11431
7,1,1,30,2,1,lstm,True,0.001,cpu,5000,0.042869,8e-05,4003.352353,11431
8,1,5,30,2,1,lstm,True,0.001,cpu,4000,0.027851,0.00039,1060.590307,11555
9,1,5,30,2,1,lstm,True,0.001,cpu,5000,0.028472,0.00037,1322.672016,11555


In [37]:
df_lstm_by_osize

Unnamed: 0_level_0,Unnamed: 1_level_0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
output_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,0,1,1,30,2,1,lstm,True,0.001,cpu,4000,0.036997,8e-05,3181.618985,11431
1,1,1,1,30,2,1,lstm,True,0.001,cpu,5000,0.042869,8e-05,4003.352353,11431
5,0,1,5,30,2,1,lstm,True,0.001,cpu,4000,0.027851,0.00039,1060.590307,11555
5,1,1,5,50,2,1,lstm,True,0.001,cpu,4000,0.028084,0.000345,1144.327997,31255
10,0,1,10,60,2,1,lstm,True,0.001,cpu,4000,0.038093,0.000469,802.028822,45010
10,1,1,10,50,2,1,lstm,True,0.001,cpu,5000,0.039668,0.000276,646.750577,31510


In [38]:
df_lstm_by_osize.sort_values(by='Validation_Error',
                            ascending=True, 
                            ignore_index=True) 

Unnamed: 0,input_size,output_size,n_hidden,n_layers,num_directions,model_type,batch_first,lr,device,num_epochs,Validation_Error,Training_Error,Time_Elapsed,Num_Params
0,1,5,30,2,1,lstm,True,0.001,cpu,4000,0.027851,0.00039,1060.590307,11555
1,1,5,50,2,1,lstm,True,0.001,cpu,4000,0.028084,0.000345,1144.327997,31255
2,1,1,30,2,1,lstm,True,0.001,cpu,4000,0.036997,8e-05,3181.618985,11431
3,1,10,60,2,1,lstm,True,0.001,cpu,4000,0.038093,0.000469,802.028822,45010
4,1,10,50,2,1,lstm,True,0.001,cpu,5000,0.039668,0.000276,646.750577,31510
5,1,1,30,2,1,lstm,True,0.001,cpu,5000,0.042869,8e-05,4003.352353,11431


In [11]:
df_best_for_gru.iloc[1].to_dict()

{'input_size': 1,
 'output_size': 5,
 'n_hidden': 50,
 'n_layers': 2,
 'num_directions': 1,
 'model_type': 'gru',
 'batch_first': True,
 'lr': 0.001,
 'device': 'cpu',
 'num_epochs': 4000,
 'Validation_Error': 0.00045996459084500003,
 'Training_Error': 0.00042754743481000003}