In [1]:
from glob import glob
import os
import re
import pandas as pd
import numpy as np

In [2]:
log_path=sorted(glob('./res_log/*'))

In [3]:
# for log_file in log_path:
#     with open(log_file) as f:
#         if 'Early' in f.read():
#             print(log_file)

In [4]:
rx_dict = {
    'error_down': re.compile(r'down sim 500: (?P<error_down>.*)\n'),
    'mean_dist': re.compile(r'both up and down: (?P<mean_dist>.*)\n'),
    'error_up': re.compile(r'up sim 500: (?P<error_up>.*)\n'),
    'error_stay': re.compile(r'stay sim 500: (?P<error_stay>.*)\n'),
    'total_params':re.compile(r'Total params: (?P<total_params>.*)\n'),
}

In [5]:
def _parse_line(line):
    for key, rx in rx_dict.items():
        match = rx.search(line)
        if match:
            return key, match
    # if there are no matches
    return None, None

In [6]:
def parse_file(filepath):

    # open the file and read through it line by line
    with open(filepath, 'r') as file_object:
        line = file_object.readline()
        while line:
            # at each line check for a match with a regex
            key, match = _parse_line(line)

            if key == 'error_down':
                error_down = match.group('error_down')
                error_down=int(error_down)
                
            if key == 'mean_dist':
                mean_dist = match.group('mean_dist')
                mean_dist=float(mean_dist)
                
            if key == 'error_up':
                error_up = match.group('error_up')
                error_up=int(error_up)
                
            if key == 'error_stay':
                error_stay = match.group('error_stay')
                error_stay=int(error_stay)
                
            if key == 'total_params':
                total_params = match.group('total_params')
                
            line = file_object.readline()
            

    return [error_down,error_up,error_stay,mean_dist,total_params]

In [7]:
res_list=[]
for path in log_path:
    _log=parse_file(path)
    if len(os.path.basename(path).split("_"))==7:
        int_path=[''.join(os.path.basename(path).split("_")[:6])]
    else:
        int_path=[''.join(os.path.splitext(os.path.basename(path))[0].split("_"))]
    int_path=int(int_path[0])
    _log.append(int_path)
    
    res_list.append(_log)
    

In [8]:
res_arr=np.array(res_list)

In [9]:
res_df = pd.DataFrame(res_arr, columns=['error_down', 'error_up', 'error_stay', 'mean_dist', 'total_params'
                                       , 'file_name'])

In [10]:
res_df

Unnamed: 0,error_down,error_up,error_stay,mean_dist,total_params,file_name
0,15,17,0,131.02263896107115,643,101010222
1,14,30,0,128.95678874503818,643,101010222
2,17,48,2,130.38597376779444,643,101010222
3,37,22,0,127.1172978518066,643,101010222
4,35,14,1,127.62811573296253,2283,202020222
...,...,...,...,...,...,...
263,36,18,2,129.23616531034168,28623,808060222
264,21,9,0,129.72843864482144,33123,808080222
265,11,15,2,132.7578807156592,33123,808080222
266,9,38,1,128.6734101675026,33123,808080222


In [11]:
res_df.to_csv("res_df.csv", mode='w')

In [12]:
for i in range(len(res_arr)):
    res_arr[i,4]=int(res_arr[i,4].replace(',',''))

In [13]:
std_temp=res_arr[:,:4].astype(np.float32)
std_arr=np.zeros((int(len(res_arr)/4),10))
for i in range(0,int(len(res_arr)/4)):
    std_arr[i,:4]=np.std(std_temp[4*i:4*(i+1)],axis=0)
    std_arr[i,4]=np.sum(np.std(std_temp[4*i:4*(i+1)],axis=0))
    std_arr[i,5:9]=np.mean(std_temp[4*i:4*(i+1)],axis=0)
    std_arr[i,9]=res_arr[4*i,4]

In [14]:
hyper_params=res_arr[::4,-1]
std_arr=np.hstack([std_arr,hyper_params.reshape(67,-1)])

In [15]:
std_df=pd.DataFrame(std_arr, columns=['std_error_down', 'std_error_up', 'std_error_stay', 'std_mean_dist', 'sum_stds'
                                       ,'mean_error_down', 'mean_error_up', 'mean_error_stay', 'mean_mean_dist', 'num_params','hyper_params'])

In [16]:
std_df

Unnamed: 0,std_error_down,std_error_up,std_error_stay,std_mean_dist,sum_stds,mean_error_down,mean_error_up,mean_error_stay,mean_mean_dist,num_params,hyper_params
0,9.443913459777832,11.776565551757812,0.8660253882408142,1.5007377862930298,23.587242126464844,20.75,29.25,0.5,129.3706817626953,643.0,101010222
1,9.283721923828125,3.1124749183654785,0.4330126941204071,4.05448579788208,16.883695602416992,19.75,16.75,1.25,132.1724853515625,2283.0,202020222
2,2.6809513568878174,4.242640495300293,0.7071067690849304,3.498182535171509,11.128880500793457,15.25,18.0,1.0,131.90736389160156,3983.0,202040222
3,8.84237003326416,7.921489715576172,0.829156219959259,1.1377805471420288,18.730796813964844,25.75,24.5,0.75,130.28688049316406,6483.0,202060222
4,24.973735809326172,2.1650636196136475,0.5,1.9022310972213745,29.541030883789062,32.75,15.75,0.5,130.31629943847656,9783.0,202080222
...,...,...,...,...,...,...,...,...,...,...,...
62,3.3447721004486084,4.2646803855896,0.829156219959259,1.2550604343414307,9.693668365478516,21.75,17.75,0.75,130.5966796875,27083.0,806080222
63,1.4790199995040894,4.84768009185791,1.2247449159622192,1.0839710235595703,8.635416030883789,16.75,14.0,1.0,131.38604736328125,22023.0,808020222
64,13.589978218078613,1.5811388492584229,0.7071067690849304,0.8068363666534424,16.685060501098633,27.25,14.0,1.0,130.715087890625,24923.0,808040222
65,13.179055213928223,2.9580399990081787,0.829156219959259,0.5876663327217102,17.553916931152344,34.25,14.5,0.75,129.02297973632812,28623.0,808060222


In [17]:
std_df.to_csv("std_df.csv", mode='w')

In [18]:
compare_table=std_df.values
compare_table[:,:-1]=compare_table[:,:-1].astype(np.float32)/np.max(compare_table[:,:-1],axis=0).astype(np.float32)

In [19]:
compare_df=pd.DataFrame(compare_table, columns=['std_error_down', 'std_error_up', 'std_error_stay', 'std_mean_dist', 'sum_stds'
                                       ,'mean_error_down', 'mean_error_up', 'mean_error_stay', 'mean_mean_dist', 'num_params', 'hyper_params'])
compare_df

Unnamed: 0,std_error_down,std_error_up,std_error_stay,std_mean_dist,sum_stds,mean_error_down,mean_error_up,mean_error_stay,mean_mean_dist,num_params,hyper_params
0,0.994441,1.19766,0.666667,0.314397,2.37833,0.458564,0.506494,0.4,0.946765,0.0649298,101010222
1,0.977572,0.316534,0.333333,0.849394,1.7024,0.436464,0.290043,1,0.967269,0.230536,202020222
2,0.282303,0.43147,0.544331,0.732851,1.12214,0.337017,0.311688,0.8,0.965329,0.402201,202040222
3,0.931098,0.805604,0.638285,0.238359,1.88865,0.569061,0.424242,0.6,0.95347,0.65465,202060222
4,2.62972,0.220184,0.3849,0.398508,2.97865,0.723757,0.272727,0.4,0.953685,0.987882,202080222
...,...,...,...,...,...,...,...,...,...,...,...
62,0.352203,0.433712,0.638285,0.262929,0.977423,0.480663,0.307359,0.6,0.955737,2.73483,806080222
63,0.15574,0.493002,0.942809,0.227086,0.870719,0.370166,0.242424,0.8,0.961514,2.22387,808020222
64,1.43102,0.1608,0.544331,0.169028,1.68237,0.60221,0.242424,0.8,0.956603,2.51671,808040222
65,1.38775,0.300828,0.638285,0.123113,1.76998,0.756906,0.251082,0.6,0.94422,2.89034,808060222


In [20]:
compare_df.to_csv("compare_df.csv", mode='w')

In [21]:
compare_sum=np.sum(compare_table[:,5:-2],axis=1)*0.9+compare_table[:,-2]*0.1

In [22]:
hyper_params.shape

(67,)

In [23]:
compare_sum_df=pd.DataFrame(np.array([compare_sum,hyper_params]).T, columns=['compare_sum', 'hyper_params'])
compare_sum_df

Unnamed: 0,compare_sum,hyper_params
0,2.08713,101010222
1,2.44745,202020222
2,2.21285,202040222
3,2.35756,202060222
4,2.21394,202080222
...,...,...
62,2.38287,806080222
63,2.35908,808020222
64,2.59279,808040222
65,2.58602,808060222


In [24]:
compare_sum_df.to_csv("compare_sum_df.csv", mode='w')

In [25]:
np.argmin(compare_sum)

35

In [26]:
compare_sum[35]

1.6408758550882339

In [27]:
std_df.loc[35]

std_error_down      0.597407
std_error_up        0.252972
std_error_stay             0
std_mean_dist      0.0562571
sum_stds            0.849946
mean_error_down     0.480663
mean_error_up       0.316017
mean_error_stay            0
mean_mean_dist      0.952879
num_params          0.662728
hyper_params       602020222
Name: 35, dtype: object

In [28]:
res_df.loc[35*4:36*4-1]

Unnamed: 0,error_down,error_up,error_stay,mean_dist,total_params,file_name
140,26,22,0,130.30334083180344,6563,602020222
141,12,19,0,130.37230859394094,6563,602020222
142,25,16,0,130.40354560684307,6563,602020222
143,24,16,0,129.7452492634203,6563,602020222
