In [1]:
import pandas as pd
import json
import numpy as np
from glob import glob
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib

matplotlib.rc('pdf', fonttype=42)
matplotlib.rc('ps', fonttype=42)

import os
sns.set(font_scale=1.25, style='whitegrid')
sns.set(palette='magma')

# rdir = '../results/'
rdir = '../results_blackbox/'
# figdir = 'figs/black-box' 
figdir = '../paper/'
# Set to path to PMLB datasets
datadir = '../datasets/pmlb/datasets/'
print('figdir:',figdir)
def save(name='tmp',h=None):
    name = name.strip().replace(' ','-').replace('%','pct')
    if h == None:
        h = plt.gcf()
    h.tight_layout()
    print('saving',figdir+'/'+name+'.pdf')
    if not os.path.exists(figdir):
        os.makedirs(figdir)
    plt.savefig(figdir+'/'+name+'.pdf', bbox_inches='tight')
    

figdir: ../paper/


In [2]:
##########
# load results
##########
df_results = pd.read_feather('../results/black-box_results_srbench.feather')

df_results = pd.concat([pd.read_feather('../results/black-box_results_brush_250.feather'), df_results], axis=0)
df_results = pd.concat([pd.read_feather('../results/black-box_results_pstree.feather'), df_results], axis=0)

# df_results.reset_index()

# Removing mislabeled datasets (these are clf, but PMLB v1.0 had it as regr)
df_results = df_results[ ~df_results["dataset"].isin(["banana", "titanic"]) ]


In [3]:
df_results.columns

Index(['dataset', 'algorithm', 'random_state', 'training time (s)',
       'model_size', 'symbolic_model', 'mse_test', 'mae_test', 'r2_test',
       'params_str', 'training time (hr)', 'r2_zero_test', 'friedman_dataset',
       'symbolic_alg'],
      dtype='object')

In [4]:
print(df_results['algorithm'].unique())

df_results['algorithm'] = df_results['algorithm'].str.replace(r'^Brush$', r'Brush w/ split (no MAB)', regex=True)
df_results['algorithm'] = df_results['algorithm'].str.replace(r'^Brush wo split$', r'Brush w/o split (no MAB)', regex=True)
df_results['algorithm'] = df_results['algorithm'].apply(lambda x: x.replace('Brush wo', r'Brush w/o'))
df_results['algorithm'] = df_results['algorithm'].apply(lambda x: x.replace('Brush (', r'Brush w/ split ('))

# Short labels
df_results['algorithm'] = df_results['algorithm'].apply(lambda x: x.replace('Brush w/o split (no MAB)',r'$Brush_0$'))
df_results['algorithm'] = df_results['algorithm'].apply(lambda x: x.replace('Brush w/ split (no MAB)','Brush+S'))
df_results['algorithm'] = df_results['algorithm'].apply(lambda x: x.replace('Brush w/o split (D-UCB1)','Brush+M'))
df_results['algorithm'] = df_results['algorithm'].apply(lambda x: x.replace('Brush w/ split (D-UCB1)','Brush+SM'))

print(df_results['algorithm'].unique())

['PS-Tree' 'Brush' 'Brush (C-D-UCB1)' 'Brush (D-TS)' 'Brush wo split'
 'Brush (D-UCB1)' 'Brush wo split (D-UCB1)' 'Brush (C-D-TS)' 'AdaBoost'
 'AFP' 'AIFeynman' 'BSR' 'DSR' 'EPLEX' 'FEAT' 'AFP_FE' 'FFX' 'GP-GOMEA'
 'gplearn' 'ITEA' 'KernelRidge' 'LGBM' 'MLP' 'MRGP' 'Operon'
 'RandomForest' 'SBP-GP' 'Linear' 'XGB']
['PS-Tree' 'Brush+S' 'Brush w/ split (C-D-UCB1)' 'Brush w/ split (D-TS)'
 '$Brush_0$' 'Brush+SM' 'Brush+M' 'Brush w/ split (C-D-TS)' 'AdaBoost'
 'AFP' 'AIFeynman' 'BSR' 'DSR' 'EPLEX' 'FEAT' 'AFP_FE' 'FFX' 'GP-GOMEA'
 'gplearn' 'ITEA' 'KernelRidge' 'LGBM' 'MLP' 'MRGP' 'Operon'
 'RandomForest' 'SBP-GP' 'Linear' 'XGB']


In [5]:
df_results.sample(10)

Unnamed: 0,dataset,algorithm,random_state,training time (s),model_size,symbolic_model,mse_test,mae_test,r2_test,params_str,training time (hr),r2_zero_test,friedman_dataset,symbolic_alg
24027,687_sleuth_ex1605,MRGP,11964,7226.331212,2693,0.7655815884854065*mul( 0.03676987180682988*di...,563.238753,19.60739,-3.080632,"{'g': 250, 'max_len': 6, 'popsize': 1000, 'rt_...",2.007314,0.0,False,True
236,607_fri_c4_1000_50,Brush w/ split (C-D-UCB1),28020,579.825621,20,"Add(1.04*Add(Sin(Add(Sub(-1.54*oz2,1.38*oz1),0...",0.250793,0.371745,0.747735,"{'pop_size': 1000, 'max_gen': 250, 'verbosity'...",0.161063,0.747735,True,True
22735,658_fri_c3_250_25,ITEA,860,2718.008947,96,6.926424260394937e-2 + 0.5188583073592525*np.t...,0.499466,0.472299,0.476647,"{'ngens': 500, 'nonzeroexps': 1, 'npop': 1000,...",0.755002,0.476647,True,True
898,294_satellite_image,PS-Tree,5390,1429.264228,906,not implemented,0.746785,0.580748,0.851726,"{'height_limit': 6, 'n_pop': 25, 'n_gen': 500,...",0.397018,0.851726,False,False
1638,1193_BNG_lowbwt,SBP-GP,11964,293461.809126,364,-0.168093+-7196.370246*(((0.000419*plog((((plo...,208402.599805,365.358159,0.600347,"{'caching': False, 'classweights': False, 'eli...",81.517169,0.600347,False,True
3288,505_tecator,$Brush_0$,4426,347.999329,28,"Add(Sub(-0.81*moisture,Sub(0.21*protein,0.02))...",1.134622,0.727675,0.99466,"{'pop_size': 1000, 'max_gen': 250, 'verbosity'...",0.096666,0.99466,False,True
17522,622_fri_c2_1000_50,gplearn,11964,21099.935184,20,sin(sub(sub(sin(sin(sin(sin(sub(sqrt(sub(sqrt(...,0.284336,0.4153,0.689323,"{'const_range': '(-1.0, 1.0)', 'feature_names'...",5.861093,0.689323,True,True
379,1029_LEV,Brush w/ split (C-D-UCB1),15795,377.511048,14,"0.40*Add(Add(Sub(1.52*In2,-0.52*In3),Atan(In4)...",0.480922,0.534998,0.496488,"{'pop_size': 1000, 'max_gen': 250, 'verbosity'...",0.104864,0.496488,False,True
24863,712_chscase_geyser1,MLP,5390,13.207051,802,not implemented,35.70481,4.854377,0.699501,"{'activation': 'relu', 'alpha': 0.0001, 'batch...",0.003669,0.699501,False,False
1708,1196_BNG_pharynx,DSR,11964,35536.440783,8,sin(x10 - x6*exp(-exp(x10))),107072.155273,241.340605,0.388136,{},9.871234,0.388136,False,True


In [6]:
def count_substrings(text, substrings):
    counts = {substring: text.count(substring+'(') for substring in substrings}

    return counts


for brush in [
    'Brush+S',   # 'Brush',
    '$Brush_0$', # 'Brush wo split',
    'Brush+M',   # 'Brush wo split (D-UCB1)',
    'Brush+SM',  # "Brush (D-UCB1)",
]:
    df_aux = df_results[df_results['algorithm']==brush]

    # Using only friedman or non-friedman
    # df_aux = df_aux[df_aux.dataset.str.contains("_fri_")]
    
    # display(df_aux['symbolic_model'].values[0])

    text = "ababababa"
    substrings = [
        #"Add", "Sub", "Mul", "Div", "Aq", "Abs",
        "Acos", "Asin", "Atan", "Cos", "Cosh", "Sin", "Sinh", "Tan", "Tanh",
        # "Ceil", "Floor", 
        # "Exp", "Log",
        # "Logabs", "Log1p", "Sqrt", "Sqrtabs", "Square", "Pow", "Logistic"
    ]

    tot = 0
    for idx, row in df_aux.iterrows():
        counts = count_substrings(row['symbolic_model'], substrings)

        tot += sum([v for k,v in counts.items()])

    print(brush, tot)

Brush+S 2593
$Brush_0$ 2635
Brush+M 4169
Brush+SM 4204


In [7]:
df_results = df_results[df_results.dataset.str.contains("_fri_")]
    
for brush in [
    'Brush+S', # 'Brush',
    '$Brush_0$', # 'Brush wo split',
    'Brush+M', # 'Brush wo split (D-UCB1)',
    'Brush+SM', # "Brush (D-UCB1)",
]:
    df_aux = df_results[
        (df_results['algorithm']==brush)
        & (df_results['symbolic_model'].str.contains("If"))
    ]
    
    perc = df_aux.shape[0]/df_results[df_results['algorithm']==brush].shape[0]
    print(brush, df_aux.shape[0], perc)


Brush+S 34 0.054838709677419356
$Brush_0$ 0 0.0
Brush+M 0 0.0
Brush+SM 44 0.07096774193548387


In [8]:
df_brush_sm = df_results[
    (df_results['algorithm']=='Brush+SM')
    & (df_results['symbolic_model'].str.contains("If"))
][['dataset', 'r2_test', 'mse_test', 'symbolic_model']]

df_brush_sm.to_csv("./inspecting_models_Brush+SM.csv")

with pd.option_context('display.max_rows', None):
    display( (df_brush_sm.groupby(['dataset']).size()/10).sort_values(ascending=False) )

with pd.option_context('display.max_colwidth', None):
    display(df_brush_sm)

dataset
644_fri_c4_250_25      0.4
633_fri_c0_500_25      0.4
594_fri_c2_100_5       0.3
621_fri_c0_100_10      0.2
591_fri_c1_100_10      0.2
595_fri_c0_1000_10     0.2
649_fri_c0_500_5       0.2
601_fri_c1_250_5       0.2
603_fri_c0_250_50      0.2
618_fri_c3_1000_50     0.2
605_fri_c2_250_25      0.2
635_fri_c0_250_10      0.1
631_fri_c1_500_5       0.1
582_fri_c1_500_25      0.1
637_fri_c1_500_50      0.1
626_fri_c2_500_50      0.1
650_fri_c0_500_50      0.1
648_fri_c1_250_50      0.1
617_fri_c3_500_5       0.1
620_fri_c1_1000_25     0.1
584_fri_c4_500_25      0.1
616_fri_c4_500_50      0.1
615_fri_c4_250_10      0.1
608_fri_c3_1000_10     0.1
604_fri_c4_500_10      0.1
596_fri_c2_250_5       0.1
588_fri_c4_1000_100    0.1
653_fri_c0_250_25      0.1
dtype: float64

Unnamed: 0,dataset,r2_test,mse_test,symbolic_model
429,603_fri_c0_250_50,0.91715,0.109885,"656.59*Add(If(oz21>-1.68,0.92*Sub(0.24*Add(Log1p(0.06*oz2),0.15*Abs(0.03*oz3)),-0.02*Sub(0.05*Add(oz4,-215.56*oz1),-0.02*oz5)),Tanh(0.32)),Sinh(Sub(Sin(59.45*Log1p(0.00*oz1)),Tanh(Sinh(0.01*oz2)))))"
438,603_fri_c0_250_50,-1.831236,2.494654,"104.94*Add(0.25*Sub(Sin(0.27*Sub(Add(oz1,oz2),Log1p(-0.05*oz5))),Sub(-0.02*oz4,2.82*Log1p(0.36*Sin(-0.23*oz1)))),If(oz3>-1.83,Log1p(Add(-0.06*oz2,0.00)),-0.06*oz39))"
645,637_fri_c1_500_50,0.887942,0.116352,"Sub(0.49*Sub(2.25*Sin(Sub(Add(-1.25*oz2,-1.44*oz1),-0.77)),Add(1.25*Add(-0.31*oz5,Sin(1.57*oz2)),Sinh(0.05*oz24))),0.31*Sub(2.54*Sub(-3.02*Log1p(If(oz27>-1.83,0.16*oz4,-0.29*oz18)),Tanh(Square(-0.77*oz1))),Atan(Sub(Floor(oz2),0.46*oz1))))"
801,584_fri_c4_500_25,0.915048,0.094827,"0.93*Add(Add(1.12*Sin(Sub(Add(-1.52*oz2,-1.34*oz1),-0.60)),If(oz5>3.09,Cosh(0.38*oz4),0.38*oz4)),0.23*Add(2.41*Sin(36.73*Add(Atan(-0.03*oz2),Atan(-0.01))),Square(Log1p(Mul(0.44*oz3,2.90*oz3)))))"
1144,620_fri_c1_1000_25,0.92291,0.075943,"0.79*Sub(1.61*Sin(1.16*Add(1.81*Sin(0.83*Add(-1.00*oz1,0.53)),Sinh(-0.96*oz2))),0.72*Add(Sub(Add(1.63*Tan(-0.36*oz4),If(oz16>-1.74,-0.38*oz5,0.89)),Add(Sqrtabs(-0.77*oz1),0.73*Exp(-0.77*oz2))),1.67))"
1234,601_fri_c1_250_5,0.925355,0.086412,"1.42*Add(0.84*Sin(6.79*Sub(Add(0.89*Log1p(-0.25*oz2),Sinh(-1.24)),0.77*Logistic(Sub(1.43*oz1,1.71)))),Add(Tan(Asin(1.00*Add(0.26*oz4,0.13*oz5))),If(oz3>1.45,0.28,Sub(-0.15,0.26*Log1p(0.45*oz2)))))"
1236,601_fri_c1_250_5,0.908676,0.088812,"4.59*Add(0.34*Tan(0.66*Cos(Sub(Sub(6748.90,1.60*oz2),1.24*Exp(0.70*oz1)))),Sub(Sub(0.75*Logistic(Sub(0.48*oz4,0.20*oz1)),If(oz1>2.10,-0.53*oz2,-0.39*oz2)),Sin(Add(Add(0.46,-0.05*oz5),0.53*oz2))))"
1407,595_fri_c0_1000_10,0.841067,0.149495,"Sub(Sinh(0.78*Sin(0.87*Add(oz2,If(oz4>1.64,oz2,oz1)))),300.93*Add(0.40*Sub(Tan(Sub(-0.00*oz5,0.09*oz1)),2.50*Cos(0.04*Abs(oz2))),Sqrt(Exp(Add(-0.00*oz4,0.08*oz1)))))"
1444,595_fri_c0_1000_10,0.684345,0.31505,"90.61*Add(Sub(Tanh(0.15*Add(20.59*Log1p(0.04*oz1),Sub(oz2,0.10*oz4))),Sub(Add(0.19,Sin(0.12*oz1)),If(oz8>1.68,-0.04,0.02*oz4))),Sin(0.28*Add(Sqrt(Logistic(0.07*oz5)),-0.51*oz2)))"
1678,644_fri_c4_250_25,0.796696,0.177298,"1.98*Sinh(0.51*Add(Sin(Sub(Add(-1.40*oz2,-0.95*oz1),Add(0.32*oz5,-0.44))),If(oz3>3.00,Sinh(0.75*oz4),Sub(Sin(0.37*oz4),0.16*oz2))))"


In [9]:
df_brush_s = df_results[
    (df_results['algorithm']=='Brush+S')
    & (df_results['symbolic_model'].str.contains("If"))
][['dataset', 'r2_test', 'mse_test', 'symbolic_model']]

df_brush_s.to_csv("./inspecting_models_Brush+S.csv")

with pd.option_context('display.max_rows', None):
    display( (df_brush_s.groupby(['dataset']).size()/10).sort_values(ascending=False) )

with pd.option_context('display.max_colwidth', None):
    display(df_brush_s)

dataset
649_fri_c0_500_5      0.3
635_fri_c0_250_10     0.3
633_fri_c0_500_25     0.2
653_fri_c0_250_25     0.2
599_fri_c2_1000_5     0.2
643_fri_c2_500_25     0.2
612_fri_c1_1000_5     0.2
582_fri_c1_500_25     0.1
651_fri_c0_100_25     0.1
648_fri_c1_250_50     0.1
647_fri_c1_250_10     0.1
644_fri_c4_250_25     0.1
637_fri_c1_500_50     0.1
621_fri_c0_100_10     0.1
624_fri_c0_100_5      0.1
586_fri_c3_1000_25    0.1
615_fri_c4_250_10     0.1
609_fri_c0_1000_5     0.1
604_fri_c4_500_10     0.1
602_fri_c3_250_10     0.1
594_fri_c2_100_5      0.1
592_fri_c4_1000_25    0.1
591_fri_c1_100_10     0.1
590_fri_c0_1000_50    0.1
658_fri_c3_250_25     0.1
dtype: float64

Unnamed: 0,dataset,r2_test,mse_test,symbolic_model
665,637_fri_c1_500_50,0.723136,0.277977,"Sub(Asin(Sin(Sub(Sub(-0.90*oz1,1.42*oz2),-0.30))),If(oz38>-1.67,Sub(-0.35*oz4,Add(Mul(0.10,1.62*oz5),0.08)),-1.89))"
766,609_fri_c0_1000_5,0.878789,0.128217,"Add(Add(0.26,0.88*Sin(Add(0.81*oz1,Sinh(0.74*oz2)))),0.31*Add(Add(2.10*Sin(Add(-1.53,1.19*oz3)),If(oz1>-1.73,1.85*oz4,11.68*oz3)),1.50*Sinh(0.56*oz5)))"
1698,644_fri_c4_250_25,0.895226,0.102057,"Add(0.69*Sin(Add(Add(1.95*oz2,1.31),1.36*oz1)),Add(1.11*Sub(Sin(Sub(-1.34*oz2,oz1)),-0.05*oz11),If(oz3>3.02,1.54*oz4,Add(0.03*oz22,0.37*oz4))))"
2007,621_fri_c0_100_10,0.679206,0.23767,"Add(Add(Tan(-0.74*Sqrtabs(Add(oz2,-0.61))),0.82*Add(Log1p(0.47*oz1),Sub(Sin(oz4),If(oz3>1.61,-2.65,-1.23)))),0.26*oz5)"
2198,582_fri_c1_500_25,0.784922,0.212985,"Add(Add(0.88*Sin(Add(-3.30*oz1,0.86*Add(1.06,oz3))),Sub(Add(0.68*Sin(-1.63*oz1),0.36*oz4),-0.03*oz9)),If(oz1>2.18,0.17*oz5,oz14))"
2397,651_fri_c0_100_25,0.473906,0.474157,"0.40*Add(Sinh(Sub(oz4,0.11*Sinh(1.69*oz1))),Sub(Add(If(oz14>-1.64,Add(oz2,2.01*oz1),-54.11*oz5),Square(Square(0.67*oz3))),-0.96*oz5))"
2817,649_fri_c0_500_5,0.882665,0.114885,"10.52*Sub(17.41*Sub(1.60*Sin(0.62*Sub(0.18*oz2,0.32*Add(-0.55*oz1,0.07))),1.00*Cosh(Abs(0.05*oz2))),Sub(If(oz2>-1.81,Add(0.18*Exp(-0.30*oz4),-0.03*oz5),0.31),1.00*Add(203.76*Log1p(-0.01*oz1),0.60*Add(-5.08*oz2,Abs(29.96)))))"
2825,649_fri_c0_500_5,0.887517,0.106888,"Add(0.85*Sub(Sub(0.67*oz4,If(oz2>-1.80,-0.55,0.55)),Cos(oz3)),Sub(Sin(0.97*Sin(Add(oz1,0.87*oz2))),-0.28*oz5))"
2832,649_fri_c0_500_5,0.828625,0.163486,"Add(Sinh(If(oz2>-1.82,0.57*oz4,-1.19)),Add(Sin(Sin(Add(oz1,0.91*oz2))),Tan(0.21*oz5)))"
3432,599_fri_c2_1000_5,0.937468,0.059852,"Sub(Sinh(0.52*Sin(1.87*Sub(-0.96*oz2,Add(oz1,-0.69)))),Sub(Add(2.05*Log1p(-0.18*oz4),If(oz3>-2.10,Add(-0.19*oz5,0.23*oz3),1.30*oz2)),1.01*Sin(0.98*Sub(-1.30*oz2,oz1))))"


In [10]:
df_brush_sm_set = set(df_brush_sm.dataset.unique().tolist())
df_brush_s_set  = set(df_brush_s.dataset.unique().tolist())

common_ds = df_brush_sm_set.intersection(df_brush_s_set)
common_ds

{'582_fri_c1_500_25',
 '591_fri_c1_100_10',
 '594_fri_c2_100_5',
 '604_fri_c4_500_10',
 '615_fri_c4_250_10',
 '621_fri_c0_100_10',
 '633_fri_c0_500_25',
 '635_fri_c0_250_10',
 '637_fri_c1_500_50',
 '644_fri_c4_250_25',
 '648_fri_c1_250_50',
 '649_fri_c0_500_5',
 '653_fri_c0_250_25'}

In [11]:
# !pip install statannotations
# from statannotations.Annotator import Annotator #https://github.com/trevismd/statannotations
from scipy.stats import ttest_1samp 

np.random.seed(42)
def bootstrap(val, n = 1000, fn=np.mean):
    val_samples = []
    for i in range(n):
        sample = np.random.randint(0,len(val)-1, size=len(val))
        val_samples.append( fn(val[sample]) )
    m = np.mean(val_samples)
    sd = np.std(val_samples)
    ci_upper  = np.quantile(val_samples,0.95)
    ci_lower  = np.quantile(val_samples,0.05)
    return m, sd, ci_upper,ci_lower

from IPython.display import display, HTML

def wrap_df_text(df):
    return display(HTML(df.to_html().replace("\\n","<br>")))


ds = '562_cpu_small'
for ds in ['562_cpu_small', '227_cpu_small', '1203_BNG_pwLinear', '197_cpu_act', '215_2dplanes', '229_pwLinear']:
    print(ds)

    # Filter by the selected dataset and show the best performing expressions for the algorithms
    filtered = df_results[
        (df_results['dataset']==ds)
        & (df_results['algorithm'].isin(['Brush+S', '$Brush_0$', 'Brush+SM', 'Brush+M'])) 
    ]

    # print(filtered.shape)

    # groupby algorithm, then find the highest value for r2_test column, and report the value in symbolic_model column for that row

    # Group by algorithm and find the row with the max r2_test within each group
    # best_performing_models = filtered.groupby('algorithm').apply(lambda x: x.loc[x['r2_test'].idxmax(), ['symbolic_model', 'r2_test', 'model_size']])
    best_performing_models = filtered.groupby('algorithm').apply(lambda x: x.loc[x['model_size'].idxmin(), ['symbolic_model', 'r2_test', 'model_size']])

    # Reset index to make 'algorithm' a regular column again
    best_performing_models.reset_index(inplace=True)
    best_performing_models['symbolic_model'] = best_performing_models['symbolic_model'].str.wrap(80)

    with pd.option_context('display.max_colwidth', None):
        wrap_df_text(best_performing_models)
        # display(best_performing_models)
        
    for col_of_interest in ['r2_test', 'model_size']:
        display(filtered.groupby('algorithm')[col_of_interest].mean())

        # 95% confidence interval
        print('confidence interval')
        for alg, dg in filtered.groupby('algorithm'):
            _, sdx, ciux, cilx = bootstrap(dg[col_of_interest].values, fn=np.median, n=1000)
            print(alg, round(ciux, 3), round(cilx, 3), round(sdx, 3), sep='\t')

        # Staistics
        # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_1samp.html
        for algorithm in filtered['algorithm'].unique():
            if algorithm == '$Brush_0$': continue

            print(algorithm, 
                ttest_1samp(
                    (filtered[filtered['algorithm']==algorithm])[col_of_interest].dropna(),
                    popmean=filtered[filtered['algorithm']=='$Brush_0$'][col_of_interest].mean(),
                    alternative='greater' if col_of_interest == 'r2_test' else 'less'
                )
            )

562_cpu_small


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  best_performing_models = filtered.groupby('algorithm').apply(lambda x: x.loc[x['model_size'].idxmin(), ['symbolic_model', 'r2_test', 'model_size']])


ValueError: cannot insert algorithm, already exists