In [1]:
from glob import glob
import os
import re
import pandas as pd
import numpy as np

In [2]:
log_path=sorted(glob('./res_log/*'))

In [3]:
# for log_file in log_path:
#     with open(log_file) as f:
#         if 'Early' in f.read():
#             print(log_file)

In [4]:
rx_dict = {
    'error_down': re.compile(r'down sim 500: (?P<error_down>.*)\n'),
    'mean_dist': re.compile(r'both up and down: (?P<mean_dist>.*)\n'),
    'error_up': re.compile(r'up sim 500: (?P<error_up>.*)\n'),
    'error_stay': re.compile(r'stay sim 500: (?P<error_stay>.*)\n'),
    'total_params':re.compile(r'Total params: (?P<total_params>.*)\n'),
}

In [5]:
def _parse_line(line):
    for key, rx in rx_dict.items():
        match = rx.search(line)
        if match:
            return key, match
    # if there are no matches
    return None, None

In [6]:
def parse_file(filepath):

    # open the file and read through it line by line
    with open(filepath, 'r') as file_object:
        line = file_object.readline()
        while line:
            # at each line check for a match with a regex
            key, match = _parse_line(line)

            if key == 'error_down':
                error_down = match.group('error_down')
                error_down=int(error_down)
                
            if key == 'mean_dist':
                mean_dist = match.group('mean_dist')
                mean_dist=float(mean_dist)
                
            if key == 'error_up':
                error_up = match.group('error_up')
                error_up=int(error_up)
                
            if key == 'error_stay':
                error_stay = match.group('error_stay')
                error_stay=int(error_stay)
                
            if key == 'total_params':
                total_params = match.group('total_params')
                
            line = file_object.readline()
            

    return [error_down,error_up,error_stay,mean_dist,total_params]

In [7]:
res_list=[]
for path in log_path:
    _log=parse_file(path)
    if len(os.path.basename(path).split("_"))==7:
        int_path=[''.join(os.path.basename(path).split("_")[:6])]
    else:
        int_path=[''.join(os.path.splitext(os.path.basename(path))[0].split("_"))]
    int_path=int(int_path[0])
    _log.append(int_path)
    
    res_list.append(_log)
    

In [8]:
res_arr=np.array(res_list)

In [9]:
res_df = pd.DataFrame(res_arr, columns=['error_down', 'error_up', 'error_stay', 'mean_dist', 'total_params'
                                       , 'file_name'])

In [10]:
res_df

Unnamed: 0,error_down,error_up,error_stay,mean_dist,total_params,file_name
0,0,0,0,137.2223520649639,313,101010111
1,0,0,0,134.14566820393335,313,101010111
2,0,0,0,135.36104830422676,313,101010111
3,0,0,0,134.84112066232808,313,101010111
4,0,0,1,132.0152592228053,643,101010222
...,...,...,...,...,...,...
519,0,0,0,133.2426979011194,13683,808080111
520,0,0,0,130.90186270358987,33123,808080222
521,0,0,0,132.44833418215222,33123,808080222
522,2,0,2,130.8339218679489,33123,808080222


In [11]:
res_df.to_csv("res_df.csv", mode='w')

In [12]:
for i in range(len(res_arr)):
    res_arr[i,4]=int(res_arr[i,4].replace(',',''))

In [13]:
std_temp=res_arr[:,:4].astype(np.float32)
std_arr=np.zeros((int(len(res_arr)/4),10))
for i in range(0,int(len(res_arr)/4)):
    std_arr[i,:4]=np.std(std_temp[4*i:4*(i+1)],axis=0)
    std_arr[i,4]=np.sum(np.std(std_temp[4*i:4*(i+1)],axis=0))
    std_arr[i,5:9]=np.mean(std_temp[4*i:4*(i+1)],axis=0)
    std_arr[i,9]=res_arr[4*i,4]

In [14]:
hyper_params=res_arr[::4,-1]
std_arr=np.hstack([std_arr,hyper_params.reshape(131,-1)])

In [15]:
std_df=pd.DataFrame(std_arr, columns=['std_error_down', 'std_error_up', 'std_error_stay', 'std_mean_dist', 'sum_stds'
                                       ,'mean_error_down', 'mean_error_up', 'mean_error_stay', 'mean_mean_dist', 'num_params','hyper_params'])

In [16]:
std_df

Unnamed: 0,std_error_down,std_error_up,std_error_stay,std_mean_dist,sum_stds,mean_error_down,mean_error_up,mean_error_stay,mean_mean_dist,num_params,hyper_params
0,0.0,0.0,0.0,1.14104425907135,1.14104425907135,0.0,0.0,0.0,135.39254760742188,313.0,101010111
1,0.0,0.0,0.4330126941204071,0.33274754881858826,0.7657602429389954,0.0,0.0,0.25,131.95924377441406,643.0,101010222
2,0.0,0.0,0.0,0.5499991178512573,0.5499991178512573,0.0,0.0,0.0,133.3686981201172,1023.0,202020111
3,0.0,0.4330126941204071,0.4330126941204071,0.5294870138168335,1.395512342453003,0.0,0.25,0.25,132.17578125,2283.0,202020222
4,0.0,0.0,0.4330126941204071,0.8390873670578003,1.2721000909805298,0.0,0.0,0.25,133.71481323242188,1503.0,202040111
...,...,...,...,...,...,...,...,...,...,...,...
126,0.0,0.0,0.4330126941204071,0.3806193172931671,0.8136320114135742,0.0,0.0,0.25,131.50448608398438,24923.0,808040222
127,0.4330126941204071,0.0,0.4330126941204071,0.3488175868988037,1.2148430347442627,0.25,0.0,0.25,133.78826904296875,12003.0,808060111
128,0.0,0.4330126941204071,0.0,0.5322102904319763,0.965222954750061,0.0,0.25,0.0,131.00352478027344,28623.0,808060222
129,0.4330126941204071,0.0,0.0,0.33318275213241577,0.7661954164505005,0.25,0.0,0.0,133.6107177734375,13683.0,808080111


In [17]:
std_df.to_csv("std_df.csv", mode='w')

In [19]:
np.argmin(std_df["mean_mean_dist"].values)

118

In [20]:
compare_table=std_df.values
compare_table[:,:-1]=compare_table[:,:-1].astype(np.float32)/np.max(compare_table[:,:-1],axis=0).astype(np.float32)

In [21]:
compare_df=pd.DataFrame(compare_table, columns=['std_error_down', 'std_error_up', 'std_error_stay', 'std_mean_dist', 'sum_stds'
                                       ,'mean_error_down', 'mean_error_up', 'mean_error_stay', 'mean_mean_dist', 'num_params', 'hyper_params'])
compare_df

Unnamed: 0,std_error_down,std_error_up,std_error_stay,std_mean_dist,sum_stds,mean_error_down,mean_error_up,mean_error_stay,mean_mean_dist,num_params,hyper_params
0,0,0,0,0.959636,0.412727,0,0,0,0.986999,0.0316066,101010111
1,0,0,0.39736,0.279846,0.276983,0,0,0.2,0.96197,0.0649298,101010222
2,0,0,0,0.462558,0.19894,0,0,0,0.972245,0.103302,202020111
3,0,0.5,0.39736,0.445307,0.50477,0,0.333333,0.2,0.963549,0.230536,202020222
4,0,0,0.39736,0.705685,0.460131,0,0,0.2,0.974768,0.151772,202040111
...,...,...,...,...,...,...,...,...,...,...,...
126,0,0,0.39736,0.320107,0.294299,0,0,0.2,0.958655,2.51671,808040222
127,0.433013,0,0.39736,0.293361,0.439421,0.25,0,0.2,0.975304,1.21206,808060111
128,0,0.5,0,0.447597,0.349131,0,0.333333,0,0.955003,2.89034,808060222
129,0.433013,0,0,0.280212,0.27714,0.25,0,0,0.974009,1.3817,808080111


In [22]:
compare_df.to_csv("compare_df.csv", mode='w')

In [23]:
compare_sum=np.sum(compare_table[:,5:-2],axis=1)*0.9+compare_table[:,-2]*0.1

In [24]:
hyper_params.shape

(131,)

In [25]:
compare_sum_df=pd.DataFrame(np.array([compare_sum,hyper_params]).T, columns=['compare_sum', 'hyper_params'])
compare_sum_df

Unnamed: 0,compare_sum,hyper_params
0,0.89146,101010111
1,1.05227,101010222
2,0.885351,202020111
3,1.37025,202020222
4,1.07247,202040111
...,...,...
126,1.29446,808040222
127,1.40398,808060111
128,1.44854,808060222
129,1.23978,808080111


In [26]:
compare_sum_df.to_csv("compare_sum_df.csv", mode='w')

In [27]:
np.argmin(compare_sum)

2

In [31]:
compare_sum[118]

1.460034991800785

In [32]:
std_df.loc[118]

std_error_down      0.433013
std_error_up               0
std_error_stay       0.39736
std_mean_dist       0.253261
sum_stds            0.422174
mean_error_down         0.25
mean_error_up              0
mean_error_stay          0.2
mean_mean_dist      0.951419
num_params           1.98758
hyper_params       806040222
Name: 118, dtype: object

In [34]:
res_df.loc[118*4:119*4-1]

Unnamed: 0,error_down,error_up,error_stay,mean_dist,total_params,file_name
472,0,0,1,130.4449999614852,19683,806040222
473,1,0,0,130.19800503425958,19683,806040222
474,0,0,0,131.0082064965499,19683,806040222
475,0,0,0,130.3961298852616,19683,806040222
