In [2]:
# %matplotlib inline
%load_ext autoreload
%autoreload 2
import main, storage, plotting
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import itertools
from itertools import takewhile,dropwhile
from fractions import Fraction


def p(func, /, *args, **keywords):
    def newfunc(*fargs, **fkeywords):
        newkeywords = {**keywords, **fkeywords}
        return func(*args, *fargs, **newkeywords)
    newfunc.func = func
    newfunc.args = args
    newfunc.keywords = keywords
    return newfunc

if False: #delete the all experiment records
        shutil.rmtree("ppdata")
        shutil.rmtree("exdata")
        shutil.rmtree("graphs")

df_og = storage.merge_and_load()
df_og, common_eval = plotting.compute_ranks(df_og)

# helper functions and axilliary data 
pd.options.mode.chained_assignment = None          # prevents displaying a useless warning

def df_enhance(df):
        
        df['avg_rank'] = df['ranks'].apply(np.mean) 
        df['last_rank'] = df['ranks'].apply(lambda a: a[-1]) 
        df['true_evaluations'] = (df['pop_size'] * df['true_ratio']).map(int)
        df['true_ratio'] = df['true_ratio'].map(Fraction).map(lambda f:Fraction(1,12) if f.denominator > 9999 else f) # due to floating point inacurracies the 1/12 gets represented weird and needs to be repaired 
        df['dim_red'] = df['dim_red'].replace('','none')
        df['model'] = df['model'].replace('','none')
        df['dim_red_kind'] = df['dim_red'].map(lambda a: ''.join(takewhile(lambda s: s.isalpha(), a)))
        df['model_kind'] = df['model'].map(lambda a: ''.join(takewhile(lambda s: s.isalpha(), a)))
        return df

df_og = df_enhance(df_og)
df_og = df_og[df_og['scale_train']==False]
pure_mask = df_og['evo_mode'].map(str) == 'Pure'
pures = df_og[pure_mask] 
# pures2 = df_og[df_og['scale_train']==Tru] 

pca_mask = (df_og['model_kind'] == 'gp')&(df_og['dim_red_kind'] == 'pca')&((df_og['pop_size']==48)|(df_og['pop_size']==64))&(df_og['true_ratio'].map(Fraction)==Fraction(1/8))
pca_df = df_og[pca_mask] 

# keep the special cases separate, it makes all the graphing easier
df_og = df_og[~(
       pure_mask 
       |(pca_mask&(df_og['dim_red'] != 'pca0.5'))   
)]  





The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# df_og = storage.merge_and_load()
# df_og == df_og[


# ]
# storage.overwrite(df_og)

In [3]:

def save_and_show(name:str):
        plt.savefig(f'graphs/{name}.png', bbox_inches='tight')
        plt.show()
def print_latex(df: pd.Series):
        r = lambda s: ' ' if r is None else s.replace('_', ' ')
        s= "\\begin{tabular}{|lr|}\n"  # + " | ".join(["c"] * len(df.columns)) + "}\n"
        s+= "\\hline\n"
        s+= r(df.index.name) + ' & ' + r(df.name) +'\\\\\n'
        s+= "\\hline\n"
        for k, v in df.items():
                s+= f"{k} & {v:0.2f} \\\\\n"
        s+= "\\hline\n"
        s+= "\\end{tabular}"
        print(s)
def np_apply_axis0(fn=None):
        def inner(arr, fn):
                arr = arr.to_list()
                b = np.apply_along_axis(fn,0,arr) 
                return list(b)
        return lambda a: inner(a, fn)
def close_to(series, num):
        return series.map(lambda a: abs(a - num) <= 1e-3)

def default_groupby(df, columns):
        map_dict = {
                'ranks':np_apply_axis0(np.average), 
                'avg_rank':'mean', 
                'last_rank':'mean',
                'elapsed_time':'mean',
                'model':'first',
                'dim_red':'first',
                'model_kind':'first',
                'dim_red_kind':'first',
        }
        for c in columns:  
               if c in map_dict: 
                      del map_dict[c]
        res = df.groupby(columns).agg(map_dict)
        return res

baselines = default_groupby(pures, ['pop_size'])
baseline_color = '#E04836'
default_color = 'forestgreen'
def bar(df,x_name=None, y_name=None,  index_mapper = None, y_mapper = None, regr = False, baseline_i=-1, x_ticklabel_mapper=None, print_table=True):
        if x_name != None:    
            df = default_groupby(df,x_name)
        if y_name == None:    
            y_name='last_rank'
        df = df.sort_index()
        
        if print_table:
                print_latex(df[y_name])
        colors = [default_color for _ in range(len(df))]
        if baseline_i != -1 :
                colors += [baseline_color]
                df.loc[str(len(df))] = baselines.loc[baseline_i]
                
        x = df.index
        y = df[y_name] 
        
        # if index_mapper != None: 
        #        x = x.map(index_mapper)
        # if y_mapper != None: 
        #        y = y.map(y_mapper)
        ax = sns.barplot(x=x.map(str), y=y, palette=colors)
        # fig, ax = plt.subplots()
        # ax.bar(x.map(str), y, label=bar_labels, color=bar_colors)
        ax.set_ylabel('Rank Percentile')
        xn = df.index.name if x_name == None else x_name
        if xn != None: # df.index.name can be None
                xn = xn.split('_')
                xn = map(lambda s: s.capitalize(),xn)
                xn = ' '.join(xn)
                ax.set_xlabel(xn)
        xticklabels = ax.get_xticklabels()
        if x_ticklabel_mapper:
               xticklabels = x_ticklabel_mapper(xticklabels)
        if baseline_i != -1 :
                xticklabels[-1] = 'baseline'
                plt.axhline(y=y[-1], color=baseline_color, linestyle='dotted')
        ax.set_xticklabels(xticklabels, size='small')
        # from pydoc import locate
# >>> locate('int')
        # print('\n'.join(map(lambda (a,b): f'{a}',zip(y,x))))
        if regr:
                xx = np.arange(len(y)-(1 if baseline_i != -1  else 0))
                m, b = np.polyfit(xx, y[:-1] if baseline_i != -1  else y, 1)
                ax.plot(xx, m*xx + b,color='red', alpha=0.5)
        return ax 



In [None]:
df = df_og
df = default_groupby(df, ['true_ratio'])
ax = bar(df,'true_ratio')

save_and_show('rrrrr')

\begin{tabular}{|lr|}
\hline
true ratio & last rank\\
\hline
1/16 & 46.03 \\
1/12 & 41.96 \\
1/8 & 64.36 \\
1/4 & 67.27 \\
1/2 & 45.84 \\
\hline
\end{tabular}



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  ax.set_xticklabels(xticklabels, size='small')


In [30]:
df = pures
# df = df[df['pop_size']]
ax = bar(df, 'scale_train')
ax.set_xlabel('Population Size')
ax.set_title('Normal Evaluation')

save_and_show('pure')

\begin{tabular}{|lr|}
\hline
scale train & last rank\\
\hline
False & 50.00 \\
True & 50.00 \\
\hline
\end{tabular}



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  ax.set_xticklabels(xticklabels, size='small')


In [32]:
# pca 
from itertools import takewhile,dropwhile

df=pca_df
df = df[(df['model_kind'] == 'gp')&(df['pop_size']==64)]

df['pca_ratio'] = df['dim_red'].map(lambda a: ''.join(dropwhile(lambda s: s.isalpha(), a)))

# df1 = df_og[(df_og['model_kind'] == 'gp')&(df_og['dim_red_kind'] == 'none')&(df_og['pop_size']==48)&(df_og['true_ratio'].map(Fraction)==Fraction(1/8))].iloc[0]
# df1['pca_ratio'] = str(1.0)
# df.loc[str(len(df))] = df1
df1 = df_og[(df_og['model_kind'] == 'gp')&(df_og['dim_red_kind'] == 'none')&(df_og['pop_size']==64)&(df_og['true_ratio'].map(Fraction)==Fraction(1/8))].iloc[0]
df1['pca_ratio'] = str(1.0)
df.loc[str(len(df))] = df1
ax = bar(df,'pca_ratio', regr=True, baseline_i=8)
ax.set_xlabel('pca reduction ratio')
ax.set_title('PCA + GP')

save_and_show('pca')

\begin{tabular}{|lr|}
\hline
pca ratio & last rank\\
\hline
0.1 & 45.41 \\
0.2 & 40.50 \\
0.3 & 48.47 \\
0.4 & 51.64 \\
0.5 & 54.96 \\
0.6 & 58.30 \\
0.7 & 59.68 \\
0.8 & 68.75 \\
0.9 & 69.25 \\
1.0 & 74.84 \\
\hline
\end{tabular}



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  plt.axhline(y=y[-1], color=baseline_color, linestyle='dotted')
  ax.set_xticklabels(xticklabels, size='small')


KeyboardInterrupt: 

In [35]:
df=df_og
df = df[(df['model_kind'] == 'gp')& (df['dim_red_kind'] == 'none')]  
df = default_groupby(df, ['true_evaluations', 'pop_size'])
pures2 = pures.set_index(pures['pop_size'].map(lambda n: (n,n)))
df = pd.concat([df, pures2])
ax = bar(df, 'last_rank', print_table=False)

save_and_show('pop_evals')


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  ax.set_xticklabels(xticklabels, size='small')


KeyboardInterrupt: 

In [36]:
df=df_og
df = df[(df['model_kind'] == 'gp')]  
ax = bar(df, 'dim_red')
save_and_show('gp_dim_red')


\begin{tabular}{|lr|}
\hline
dim red & last rank\\
\hline
none & 64.01 \\
pca0.5 & 48.84 \\
vae[0.5] & 40.25 \\
\hline
\end{tabular}



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  ax.set_xticklabels(xticklabels, size='small')


KeyboardInterrupt: 

In [None]:
df=df_og

df = default_groupby(df, ['dim_red_kind'])
ax = bar(df, 'elapsed_time', add_baseline=False)
ax.set_ylabel('Iteration Time (ms)')
save_and_show('gp_dim_red')

\begin{tabular}{|lr|}
\hline
dim red kind & elapsed time\\
\hline
none & 5.68 \\
\hline
\end{tabular}



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  ax.set_xticklabels(xticklabels, size='small')


In [None]:
df=df_og
df = df[(df['dim_red_kind'] == 'none')&(df['pop_size']==48)&(df['true_ratio'].map(Fraction)==Fraction(1/8))]  
df = default_groupby(df, ['model'])
ax = bar(df, 'avg_rank')
plt.show()



\begin{tabular}{|lr|}
\hline
model & avg rank\\
\hline
elm100 & 64.62 \\
elm150 & 62.30 \\
elm200 & 61.30 \\
elm50 & 65.54 \\
elm500 & 58.13 \\
gp & 70.20 \\
mlp[0.5] & 18.54 \\
mlp[1, 1] & 22.33 \\
mlp[10] & 20.37 \\
mlp[1] & 17.24 \\
mlp[2] & 17.70 \\
rbf[0.5]_1.0 & 26.72 \\
rbf[0.5]_2.0 & 31.45 \\
rbf[1]_1.0 & 28.32 \\
rbf[1]_5.0 & 55.49 \\
rbf[2]_0.5 & 25.21 \\
rbf[2]_1.0 & 30.57 \\
\hline
\end{tabular}



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  plt.axhline(y=y[-1], color=baseline_color, linestyle='dotted')
  ax.set_xticklabels(xticklabels, size='small')


In [None]:
# rank models without nodimred 
df=df_og
df1 = df[ close_to(df['true_ratio'], 1/8) & (df['dim_red_kind'] == 'none')]  
df = default_groupby(df, ['model'])
ax = bar(df, 'avg_rank')
plt.xticks(rotation=90, size= 'small')
plt.show()


In [None]:
# df=df_og
# mask = ~(
#     (df['model_kind'] == 'gp')&(df['dim_red_kind']=='pca')&(df['dim_red']!='pca0.5') |
#     ((df['model_kind'] == 'elm')|((df['model_kind'] == 'rbf'))&(df['pop_size']!=48)&(df['true_ratio'].map(Fraction)!=Fraction(1/8)))
# )
# df=df[mask]
# storage.overwrite(df)

  storage.overwrite(df)
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['evo_mode', 'model', 'dim_red', 'full_desc', 'coco_directory',
       'timestamp', 'ranks', 'dim_red_kind', 'model_kind'],
      dtype='object')]

  data_storage.put(name,df)


Unnamed: 0,pop_size,evo_mode,model,dim_red,instance,function,dim,full_desc,elapsed_time,coco_directory,...,scale_train,vals,evals,true_ratio,ranks,avg_rank,best_rank,true_evaluations,dim_red_kind,model_kind
0,32,BestK0.0625,gp,vae[0.5],1,1,10,32_BestK0.0625_vae[0.5]_gp,72.508390,exdata\32_BestK0.0625_vae[0.5]_gp-0001,...,False,"[102.12232725183134, 102.12232725183134, 102.1...","[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2...",0.0625,[0.0],0.000000,0.000000,2,vae,gp
1,32,BestK0.0625,gp,vae[0.5],2,1,10,32_BestK0.0625_vae[0.5]_gp,73.431439,exdata\32_BestK0.0625_vae[0.5]_gp-0001,...,False,"[478.9991023350632, 470.8618717897199, 470.475...","[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2...",0.0625,[5.658164761067141],5.658165,5.658165,2,vae,gp
2,32,BestK0.0625,gp,vae[0.5],3,1,10,32_BestK0.0625_vae[0.5]_gp,73.417115,exdata\32_BestK0.0625_vae[0.5]_gp-0001,...,False,"[-166.88599727744486, -166.88599727744486, -17...","[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2...",0.0625,[0.20521841102315364],0.205218,0.205218,2,vae,gp
3,32,BestK0.0625,gp,vae[0.5],1,2,10,32_BestK0.0625_vae[0.5]_gp,61.410971,exdata\32_BestK0.0625_vae[0.5]_gp-0001,...,False,"[4419302.631536276, 1664170.0688576591, 139243...","[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2...",0.0625,[0.05863383172089698],0.058634,0.058634,2,vae,gp
4,32,BestK0.0625,gp,vae[0.5],2,2,10,32_BestK0.0625_vae[0.5]_gp,66.473295,exdata\32_BestK0.0625_vae[0.5]_gp-0001,...,False,"[1666807.8282462435, 383191.03704346257, 38319...","[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2...",0.0625,[70.56581647610672],70.565816,70.565816,2,vae,gp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
945499,16,BestK0.5,gp,pca0.5,2,23,10,16_BestK0.5_pca0.5_gp,2.871711,exdata\16_BestK0.5_pca0.5_gp-0017,...,False,"[6.381818507028731, 6.381818507028731, 5.03877...","[8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96...",0.5000,[70.65376722368806],70.653767,70.653767,8,pca,gp
945500,16,BestK0.5,gp,pca0.5,3,23,10,16_BestK0.5_pca0.5_gp,2.850498,exdata\16_BestK0.5_pca0.5_gp-0017,...,False,"[-123.0075635136414, -127.77499133355643, -127...","[8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96...",0.5000,[87.45236001172677],87.452360,87.452360,8,pca,gp
945501,16,BestK0.5,gp,pca0.5,1,24,10,16_BestK0.5_pca0.5_gp,2.998809,exdata\16_BestK0.5_pca0.5_gp-0017,...,False,"[236.2541021205564, 236.17914775917546, 236.17...","[8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96...",0.5000,[26.912928759894456],26.912929,26.912929,8,pca,gp
945502,16,BestK0.5,gp,pca0.5,2,24,10,16_BestK0.5_pca0.5_gp,2.672617,exdata\16_BestK0.5_pca0.5_gp-0017,...,False,"[253.27962446039106, 193.0494601909699, 193.04...","[8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96...",0.5000,[81.67692758721782],81.676928,81.676928,8,pca,gp


In [None]:
#popsize,true_evals           with only gp
from fractions import Fraction
df=df_og #(df['pop_size']==48)&(df['true_ratio']==1/8)
df=df[(df['model_kind'] == 'gp')&(df['dim_red_kind']=='pca')&(df['dim_red']!='pca')]
df = default_groupby(df, ['full_desc'])

ax = bar(df, 'avg_rank')
ax.set_label('dim red, true evals, aux evals')
ax.set_xlabel('truly evaluated fraction of population')
ax.set_ylabel('rank percentile avg')
plt.xticks(rotation=0, size= 'xx-small')
for tick in ax.xaxis.get_major_ticks()[1::2]:
    tick.set_pad(15)
plt.show() 

full_desc
16_BestK0.5_pca0.5_gp                    54.984869
32_BestK0.0625_pca0.5_gp                 46.164315
48_BestK0.0625_pca0.5_gp                 54.768601
48_BestK0.08333333333333333_pca0.5_gp    51.945596
4_BestK0.5_pca0.5_gp                     47.587330
64_BestK0.0625_pca0.5_gp                 53.679445
8_BestK0.5_pca0.5_gp                     56.778337
Name: avg_rank, dtype: float64


In [None]:
#popsize,true_evals           with only gp
from fractions import Fraction
df=df_og
df=df[(df['model_kind'] == 'gp')&(df['dim_red_kind']=='none')]
df = default_groupby(df, ['true_ratio'])

ax = bar(df, 'avg_rank',index_mapper = lambda a: Fraction(a))
ax.set_label('dim red, true evals, aux evals')
ax.set_xlabel('truly evaluated fraction of population')
ax.set_ylabel('rank percentile avg')

labels = [item.get_text() for item in ax.get_xticklabels()]
labels[1] = '1/12'
ax.set_xticklabels(labels)
plt.show() 

true_ratio
0.0625      67.416219
0.083333    68.847324
0.125       68.415962
0.25        65.210289
0.5         57.586657
5           67.367328
Name: avg_rank, dtype: float64



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  plt.axhline(y=y[-1], color=baseline_color, linestyle='dotted')
  ax.set_xticklabels(xticklabels, size='small')
  ax.set_xticklabels(labels)


In [None]:
#popsize,true_evals           with only gp
df=df_og
df=df[(df['model_kind'] == 'gp')&(df['dim_red_kind']=='none')]
df = default_groupby(df, ['true_evaluations','pop_size'])
ax = bar(df, 'avg_rank')
ax.set_label('dim red, true evals, aux evals')
ax.set_xlabel('population: (true evaluated, generated)')
ax.set_ylabel('rank percentile avg')
# xlabel = ax.get_xlabel()
# ax.set_xlabel([1,2,3], rotation='horizontal')
# plt.savefig("graphs/pop.png")
# plt.pause(0.01)
plt.show() 

(2, 4)      54.534022
(2, 8)      62.201014
(2, 16)     66.480433
(2, 32)     63.559743
(3, 48)     67.170396
(4, 8)      65.289420
(4, 16)     72.138744
(4, 32)     70.131784
(4, 48)     68.847324
(4, 64)     71.518517
(6, 48)     70.197968
(8, 16)     65.532226
(8, 32)     67.032927
(8, 64)     67.446599
(12, 48)    58.138128
(16, 32)    43.413940
16          67.367328
Name: avg_rank, dtype: float64



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x=x.map(str), y=y, palette=colors)
  plt.axhline(y=y[-1], color=baseline_color, linestyle='dotted')
  ax.set_xticklabels(xticklabels, size='small')


In [None]:
df=df_og
plotting.plot_ranks(df, common_eval)

  ax.legend([handles[idx] for idx in order],[labels[idx]+'-->'+str(round(values[idx],2)) for idx in order])
