In [103]:
import json

import pandas as pd
from glob import glob
from os.path import join, isfile

from tensorflow.python.ops.gen_array_ops import deep_copy

from analysis.short_terms import column_dicts

In [104]:
exp_dirs = glob('src/*')
exp_dirs[:5]

['src/pe-cotsc_it-5_fit-vit_exp-def_chr-N_s-0',
 'src/pe-cot_it-5_fit-vit_exp-def_chr-C_s-1',
 'src/pe-cot_it-5_fit-vit_exp-def_chr-F_s-1',
 'src/pe-cot_it-5_fit-vit_exp-def_chr-D_s-1',
 'src/pe-io_it-1_fit-vit_exp-def_chr-F_s-0']

In [105]:
from copy import deepcopy

all_results = list()

for exp_dir in exp_dirs:
    basename = exp_dir.split('/')[-1]
    settings = basename.split('_')
    settings = dict([setting.split('-') for setting in settings])
    
    iter_dirs = glob(f'{exp_dir}/iteration_*')

    
    if len(iter_dirs) == 0:
        print(f'No iteration directories for {basename}')
        continue
    
    for iter_dir in iter_dirs:

        eval_file = join(iter_dir, 'evaluation.json')
      
        if not isfile(eval_file):
            print(f'Warning: No evaluation.json in {iter_dir}')
            continue
        
        with open(eval_file) as f:
            result = json.load(f)
        settings.update(result)


        iteration_number = int(iter_dir.split('iteration_')[-1])
        settings['curr_iter'] = iteration_number
    
        all_results.append(deepcopy(settings))
df = pd.DataFrame(all_results)
df



Unnamed: 0,pe,it,fit,exp,chr,s,similarity,diversity,sample_size,total,curr_iter
0,cotsc,5,vit,def,N,0,0.049034,0.064317,10,0.056675,1
1,cotsc,5,vit,def,N,0,0.027436,0.000000,10,0.013718,5
2,cotsc,5,vit,def,N,0,0.041144,0.084181,10,0.062662,2
3,cotsc,5,vit,def,N,0,0.039788,0.098419,10,0.069104,3
4,cotsc,5,vit,def,N,0,0.033787,0.017611,10,0.025699,4
...,...,...,...,...,...,...,...,...,...,...,...
69,cot,5,vit,def,M,1,0.027010,0.000000,10,0.013505,1
70,cot,5,vit,def,M,1,0.105552,0.121272,10,0.113412,5
71,cot,5,vit,def,M,1,0.012144,0.000129,10,0.006136,2
72,cot,5,vit,def,M,1,0.098063,0.140974,10,0.119519,3


In [106]:
#replce column
df = df.rename(columns=column_dicts)
# change Iter. to int
df['Iter.'] = df['Iter.'].astype(int)
df['IsLast'] = df['Iter.'] == df['curr_iter']
df

Unnamed: 0,PE,Iter.,Fit.,Exp.,Target,Seed,similarity,diversity,sample_size,total,curr_iter,IsLast
0,cotsc,5,vit,def,N,0,0.049034,0.064317,10,0.056675,1,False
1,cotsc,5,vit,def,N,0,0.027436,0.000000,10,0.013718,5,True
2,cotsc,5,vit,def,N,0,0.041144,0.084181,10,0.062662,2,False
3,cotsc,5,vit,def,N,0,0.039788,0.098419,10,0.069104,3,False
4,cotsc,5,vit,def,N,0,0.033787,0.017611,10,0.025699,4,False
...,...,...,...,...,...,...,...,...,...,...,...,...
69,cot,5,vit,def,M,1,0.027010,0.000000,10,0.013505,1,False
70,cot,5,vit,def,M,1,0.105552,0.121272,10,0.113412,5,True
71,cot,5,vit,def,M,1,0.012144,0.000129,10,0.006136,2,False
72,cot,5,vit,def,M,1,0.098063,0.140974,10,0.119519,3,False


In [107]:
df[df['IsLast'] == True].groupby(['PE', 'Target']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Iter.,Fit.,Exp.,Seed,similarity,diversity,sample_size,total,curr_iter,IsLast
PE,Target,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
cot,C,2,2,2,2,2,2,2,2,2,2
cot,D,1,1,1,1,1,1,1,1,1,1
cot,F,1,1,1,1,1,1,1,1,1,1
cot,M,1,1,1,1,1,1,1,1,1,1
cot,N,1,1,1,1,1,1,1,1,1,1
cotsc,N,1,1,1,1,1,1,1,1,1,1
io,C,1,1,1,1,1,1,1,1,1,1
io,D,1,1,1,1,1,1,1,1,1,1
io,F,1,1,1,1,1,1,1,1,1,1
io,M,1,1,1,1,1,1,1,1,1,1


In [108]:
# groupby
df_grouped = df[df['IsLast'] == True].groupby(['PE']).agg({'similarity': ['mean', 'std'], 'diversity': ['mean', 'std']})
df_grouped

Unnamed: 0_level_0,similarity,similarity,diversity,diversity
Unnamed: 0_level_1,mean,std,mean,std
PE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
cot,0.050652,0.029597,0.079714,0.073526
cotsc,0.027436,,0.0,
io,0.045995,0.017858,0.032666,0.034434
tot,0.039995,0.013703,0.036604,0.056192


In [111]:
df_grouped = df.groupby(['PE', 'curr_iter']).agg({'similarity': ['mean', 'std'], 'diversity': ['mean', 'std']})
df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity,similarity,diversity,diversity
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
PE,curr_iter,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
cot,1,0.039588,0.008397,0.045626,0.034504
cot,2,0.042113,0.014174,0.050405,0.041058
cot,3,0.055054,0.029285,0.095488,0.073654
cot,4,0.052348,0.022355,0.110634,0.07377
cot,5,0.050652,0.029597,0.079714,0.073526
cotsc,1,0.049034,,0.064317,
cotsc,2,0.041144,,0.084181,
cotsc,3,0.039788,,0.098419,
cotsc,4,0.033787,,0.017611,
cotsc,5,0.027436,,0.0,


In [114]:
df_grouped = df[df['IsLast'] == True].groupby(['Target', 'PE']).agg({'similarity': ['mean', 'std'], 'diversity': ['mean', 'std']})
df_grouped]

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity,similarity,diversity,diversity
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
Target,PE,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
C,cot,0.049654,0.014444,0.054988,0.015055
C,io,0.038951,,0.005711,
C,tot,0.057884,,0.119623,
D,cot,0.045179,,0.040866,
D,io,0.051278,,0.016054,
D,tot,0.043499,,0.0,
F,cot,0.026438,,0.206169,
F,io,0.037262,,0.058725,
F,tot,0.030336,,0.022439,
M,cot,0.105552,,0.121272,
