## Fico

In [77]:
import os
import pandas as pd
import numpy as np
from utils import load_pickle
save = True

version = 'fico_3'
metric = 'auc'

dset='100000'
theta = '(([+-])?\d(\.\d+)?)'

folder = "./results/adjmats/"

import re
list_edges = []
for filename in os.listdir(folder):
    match = re.search(f'W_est.{dset}.(\d).(\d).(.*?).{version}.pkl$', filename)
    if match:
        out_f, in_f, theta = match.groups()
        edges = sum(load_pickle(os.path.join(folder,filename), verbose=False).flatten()>0)
        list_edges.append((float(theta), out_f, in_f, edges))
n_edges = pd.DataFrame(list_edges).groupby(0, as_index=False).agg({3 : ['mean', 'std']})
n_edges.columns = n_edges.columns.droplevel()
n_edges.columns = ['theta','N_edges','Std']


folder = "./results/"
filename = f"Nested5FoldCASTLE.Reg.Synth.100000.{version}.pkl"
describe = load_pickle(os.path.join(folder,filename), verbose=False)
agg_stats = pd.DataFrame([(describe[c]['theta'],c[9],describe[c]['fold'],describe[c][metric]) for c in describe]).groupby(0, as_index=False).agg({2 : ['count','max'], 3 : ['mean', 'std']})
agg_stats.columns = ['theta', 'count','folds',  'accuracy_mean', 'accuracy_std']
agg_stats=agg_stats.merge(n_edges,on='theta')
agg_stats= agg_stats.drop_duplicates(subset=['N_edges'])

if version == 'fico2':
    #### Add Normal CASTLE
    agg_stats.loc[-1] = [-1,5,5,0.723377,0.009611,405.12,10.401602]#,True]  # adding a row
    agg_stats.index = agg_stats.index + 1  # shifting index
    agg_stats = agg_stats.sort_index()  # sorting by index

    n_edges.loc[-1] = [-1,405.12,10.401602]  # adding a row
    n_edges.index = n_edges.index + 1  # shifting index
    n_edges = n_edges.sort_index()  # sorting by index

sub = -0.005
agg_stats.loc[agg_stats['theta']==-1,'theta'] = sub

filter_theta = 0.03
filter_list = [0.005,0.007,0.009,0.011]
agg_stats = agg_stats[(agg_stats['theta']<=filter_theta) & (~agg_stats['theta'].isin(filter_list))]
display(agg_stats)

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

if metric == 'accuracy':
    metric =metric.capitalize()
else:
    metric = metric.upper()

# Add traces
fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['accuracy_mean'], name=metric,         
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['accuracy_std'],
            visible=True)
            ),
    secondary_y=True, 
)

fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['N_edges'], name="Edges",
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['Std'],
            visible=True)
              ),
    secondary_y=False,
)

# Add figure title
fig.update_layout(
    title='',
    legend={
        'y':-0.35,
        'x':0.85,
        'orientation':"h",
        'xanchor': 'center',
        'yanchor': 'bottom'},
    template='plotly_white',
    autosize=True,
    width=600, height=250, 
    margin=dict(
        l=10,
        r=10,
        b=0,
        t=10,
        pad=0
    ),
    font=dict(
        family='Serif',#"Courier New, monospace",
        size=18,
        # color="Black"
    )    
)

# Set x-axis title
fig.update_xaxes(showgrid=True,title={'text':r'$\text{Threshold } \tau$','font':{'size':18#, 'family': 'Courier New, monospace',
}},tickvals=[a for sub in [[0,0.004],list(np.round(np.arange(0.01, max(agg_stats['theta']), 0.01),2))] for a in sub],
)

# Set y-axes titles
fig.update_yaxes(showgrid=False,nticks=6,zeroline=False, title={'text':metric#,'font':{'size':18}
}, #nticks=13,
secondary_y=True)
fig.update_yaxes(title={'text':"Number of Edges"#,'font':{'size':18}
}, secondary_y=False)

fig.add_annotation(x=0.02, y=-0.14,
            text="CASTLE",
            showarrow=False,
            yshift=0,
            xref='paper',
            yref='paper',)
chosen_tau=0.004
y_co = agg_stats[agg_stats['theta']==chosen_tau]['accuracy_mean']
fig.add_annotation(x=chosen_tau, y=0.7929,
            text=str(chosen_tau),
            showarrow=True,
            yshift=0,
            xref='x',ax=0,
            yref='y2',ay=170)
fig.add_annotation(x=chosen_tau, y=0.7929,
            text=str(0.79),
            showarrow=True,
            yshift=0,
            xref='x',ax=300,
            yref='y2',ay=0)
fig.add_annotation(x=chosen_tau, y=84.6,
            text=str(85),
            showarrow=True,
            yshift=0,
            xref='x',ax=-130,
            yref='y',ay=0)

fig.show()

output_folder = "figures"
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

out_path = os.path.join(output_folder,f"plot_tauoptim_{version}.png")

if save:
    import kaleido
    fig.write_image(out_path)



Unnamed: 0,theta,count,folds,accuracy_mean,accuracy_std,N_edges,Std
0,-0.005,25,5,0.795521,0.005878,552.0,0.0
1,0.0,25,5,0.795158,0.003524,276.0,0.0
2,0.004,25,5,0.792953,0.003936,84.6,6.144103
4,0.006,25,5,0.790614,0.002682,60.8,6.812978
6,0.008,25,5,0.780123,0.012351,47.0,4.232808
8,0.01,25,5,0.773345,0.009844,36.4,2.857738
10,0.012,25,5,0.714603,0.1099,29.8,1.979057
11,0.016,25,5,0.55032,0.102716,20.8,2.362908
12,0.02,25,5,0.5,0.0,15.2,1.979057
13,0.024,25,5,0.5,0.0,11.2,0.763763


## Adult

In [75]:
import os
import pandas as pd
import numpy as np
from utils import load_pickle
save = True

version = 'adult_fulltest'
metric = 'auc'

dset='100000'
theta = '(([+-])?\d(\.\d+)?)'

folder = "./results/adjmats/"

import re
list_edges = []
for filename in os.listdir(folder):
    match = re.search(f'W_est.{dset}.(\d).(\d).(.*?).{version}.pkl$', filename)
    if match:
        out_f, in_f, theta = match.groups()
        edges = sum(load_pickle(os.path.join(folder,filename), verbose=False).flatten()>0)
        list_edges.append((float(theta), out_f, in_f, edges))
n_edges = pd.DataFrame(list_edges).groupby(0, as_index=False).agg({3 : ['mean', 'std']})
n_edges.columns = n_edges.columns.droplevel()
n_edges.columns = ['theta','N_edges','Std']


folder = "./results/"
filename = f"Nested5FoldCASTLE.Reg.Synth.100000.{version}.pkl"
describe = load_pickle(os.path.join(folder,filename), verbose=False)
agg_stats = pd.DataFrame([(describe[c]['theta'],c[9],describe[c]['fold'],describe[c][metric]) for c in describe]).groupby(0, as_index=False).agg({2 : ['count','max'], 3 : ['mean', 'std']})
agg_stats.columns = ['theta', 'count','folds',  'accuracy_mean', 'accuracy_std']
agg_stats=agg_stats.merge(n_edges,on='theta')
agg_stats= agg_stats.drop_duplicates(subset=['N_edges']) ########################################################### DROP DUPS

if version == 'fico2':
    #### Add Normal CASTLE
    agg_stats.loc[-1] = [-1,5,5,0.723377,0.009611,405.12,10.401602]#,True]  # adding a row
    agg_stats.index = agg_stats.index + 1  # shifting index
    agg_stats = agg_stats.sort_index()  # sorting by index

    n_edges.loc[-1] = [-1,405.12,10.401602]  # adding a row
    n_edges.index = n_edges.index + 1  # shifting index
    n_edges = n_edges.sort_index()  # sorting by index

sub = -0.09
agg_stats.loc[agg_stats['theta']==-1,'theta'] = sub

filter_theta = 0.6
filter_list = [0.2,0.24]
agg_stats = agg_stats[(agg_stats['theta']<=filter_theta) & (~agg_stats['theta'].isin(filter_list))]
display(agg_stats)

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

if metric == 'accuracy':
    metric =metric.capitalize()
else:
    metric = metric.upper()

# Add traces
fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['accuracy_mean'], name=metric,         
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['accuracy_std'],
            visible=True)
            ),
    secondary_y=True, 
)

fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['N_edges'], name="Edges",
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['Std'],
            visible=True)
              ),
    secondary_y=False,
)

# Add figure title
fig.update_layout(
    title='',
    legend={
        'y':-0.35,
        'x':0.85,
        'orientation':"h",
        'xanchor': 'center',
        'yanchor': 'bottom'},
    template='plotly_white',
    autosize=True,
    width=600, height=250, 
    margin=dict(
        l=10,
        r=10,
        b=10,
        t=10,
        pad=0
    ),
    font=dict(
        family='Serif',#"Courier New, monospace",
        size=18,
        # color="Black"
    )    
)

# Set x-axis title
fig.update_xaxes(showgrid=True,title={'text':r'$\text{Threshold } \tau$','font':{'size':18#, 'family': 'Courier New, monospace',
}},tickvals=[a for sub in [[0,0.08],list(np.round(np.arange(0.15, max(agg_stats['theta']), 0.15),1))] for a in sub],
)

# Set y-axes titles
fig.update_yaxes(showgrid=False,nticks=6,zeroline=False, title={'text':metric#,'font':{'size':18}
}, #nticks=13,
secondary_y=True)
fig.update_yaxes(showgrid=True,nticks=6,zeroline=True, title={'text':"Number of Edges"#,'font':{'size':18}
}, secondary_y=False)

fig.add_annotation(x=0.02, y=-0.15,
            text="CASTLE",
            showarrow=False,
            yshift=0,
            xref='paper',
            yref='paper',)

chosen_tau=0.08
y_co = agg_stats[agg_stats['theta']==chosen_tau]['accuracy_mean']
fig.add_annotation(x=chosen_tau, y=0.8556,
            text=str(chosen_tau),
            showarrow=True,
            yshift=0,
            xref='x',ax=0,
            yref='y2',ay=160)
fig.add_annotation(x=chosen_tau, y=0.8556,
            text=str(0.86),
            showarrow=True,
            yshift=0,
            xref='x',ax=280,
            yref='y2',ay=0)
fig.add_annotation(x=chosen_tau, y=46.2,
            text=str(46),
            showarrow=True,
            yshift=0,
            xref='x',ax=-160,
            yref='y',ay=0)

fig.show()

output_folder = "figures"
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

out_path = os.path.join(output_folder,f"plot_tauoptim_{version}.png")

if save:
    import kaleido
    fig.write_image(out_path)

y_co

Unnamed: 0,theta,count,folds,accuracy_mean,accuracy_std,N_edges,Std
0,-0.09,25,5,0.747038,0.027878,210.0,0.0
1,0.0,25,5,0.863468,0.00599,105.0,0.0
3,0.08,25,5,0.855648,0.010548,46.2,1.979057
4,0.1,25,5,0.851808,0.006541,40.8,2.614065
5,0.12,25,5,0.850752,0.006104,36.6,2.783882
6,0.14,25,5,0.820374,0.059331,32.2,1.354006
7,0.16,25,5,0.783488,0.06342,30.8,1.190238
8,0.18,20,5,0.65379,0.161708,29.75,1.332785
11,0.28,5,5,0.5,0.0,27.0,0.0
12,0.32,5,5,0.5,0.0,25.0,0.0


3    0.855648
Name: accuracy_mean, dtype: float64

## Boston

In [73]:
import re
from utils import load_pickle
import pandas as pd
import numpy as np

save = True
dset='100000'
version = 'boston'#'(adult2|adult)'
metric = 'MSE'
theta = '(([+-])?\d(\.\d+)?)'
folder = "./results/adjmats/"
list_edges = []
for filename in os.listdir(folder):
    match = re.search(f'W_est.{dset}.(\d).(\d).(.*?).{version}.pkl$', filename)
    if match:
        out_f, in_f, theta = match.groups()
        edges = sum(load_pickle(os.path.join(folder,filename), verbose=False).flatten()>0)
        list_edges.append((float(theta), out_f, in_f, edges))
n_edges = pd.DataFrame(list_edges).groupby(0, as_index=False).agg({3 : ['mean', 'std']})
n_edges.columns = n_edges.columns.droplevel()
n_edges.columns = ['theta','N_edges','Std']

folder = "./results/"
filename = f"Nested5FoldCASTLE.Reg.Synth.100000.{version}.pkl"
describe = load_pickle(os.path.join(folder,filename), verbose=False)
agg_stats = pd.DataFrame([(describe[c]['theta'],c[9],describe[c]['fold'],describe[c][metric]) for c in describe]).groupby(0, as_index=False).agg({2 : ['count','max'], 3 : ['mean', 'std']})
agg_stats.columns = ['theta', 'count','folds',  'accuracy_mean', 'accuracy_std']
agg_stats=agg_stats.merge(n_edges,on='theta')
agg_stats= agg_stats.drop_duplicates(subset=['N_edges'])
sub = -0.17
agg_stats.loc[agg_stats['theta']==-1,'theta'] = sub

filter_theta = 0.8
filter_list = []
agg_stats = agg_stats[(agg_stats['theta']<=filter_theta) & (~round(agg_stats['theta'],3).isin(filter_list))]
display(agg_stats)

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

if metric == 'accuracy':
    metric =metric.capitalize()
else:
    metric = metric.upper()

# Add traces
fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['accuracy_mean'], name=metric,         
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['accuracy_std'],
            visible=True)
            ),
    secondary_y=True, 
)

fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['N_edges'], name="Edges",
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['Std'],
            visible=True)
              ),
    secondary_y=False,
)

# Add figure title
fig.update_layout(
    title='',
    legend={
        'y':-0.35,
        'x':0.85,
        'orientation':"h",
        'xanchor': 'center',
        'yanchor': 'bottom'},
    template='plotly_white',
    autosize=True,
    width=600, height=250, 
    margin=dict(
        l=10,
        r=10,
        b=10,
        t=10,
        pad=0
    ),
    font=dict(
        family='Serif',#"Courier New, monospace",
        size=18,
        # color="Black"
    )    
)

# Set x-axis title
fig.update_xaxes(showgrid=True,title={'text':r'$\text{Threshold } \tau$','font':{'size':18#, 'family': 'Courier New, monospace',
}},tickvals=[a for sub in [[0,0.13],list(np.round(np.arange(0.3, max(agg_stats['theta']), 0.2),1))] for a in sub],
)

# Set y-axes titles
fig.update_yaxes(showgrid=False,nticks=6,zeroline=False, title={'text':metric#,'font':{'size':18}
}, #nticks=13,
secondary_y=True)
fig.update_yaxes(showgrid=True,nticks=6,zeroline=True, title={'text':"Number of Edges"#,'font':{'size':18}
}, secondary_y=False)

fig.add_annotation(x=0.02, y=-0.15,
            text="CASTLE",
            showarrow=False,
            yshift=0,
            xref='paper',
            yref='paper',)
chosen_tau=0.13
y_co = agg_stats[agg_stats['theta']==chosen_tau]['accuracy_mean']
fig.add_annotation(x=chosen_tau, y=20.363,
            text=str(chosen_tau),
            showarrow=True,
            yshift=-193,
            xref='x',ax=0,
            yref='paper',ay=0)
fig.add_annotation(x=chosen_tau, y=20.363,
            text=str(20.4),
            showarrow=True,
            yshift=0,
            xref='x',ax=280,
            yref='y2',ay=0)
fig.add_annotation(x=chosen_tau, y=47.8,
            text=str(48),
            showarrow=True,
            yshift=0,
            xref='x',ax=-130,
            yref='y',ay=0)
            
fig.show()

output_folder = "figures"
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

out_path = os.path.join(output_folder,f"plot_tauoptim_{version}.png")

if save:
    import kaleido
    fig.write_image(out_path)



Unnamed: 0,theta,count,folds,accuracy_mean,accuracy_std,N_edges,Std
0,-0.17,25,5,22.231392,7.387012,182.0,0.0
1,0.0,25,5,18.50769,7.04278,91.0,0.0
15,0.037,10,5,16.818329,6.78389,90.5,0.527046
16,0.066,25,5,18.437362,6.868943,86.0,1.290994
17,0.132,25,5,20.363651,5.233823,47.8,7.942502
18,0.198,25,5,40.765085,27.205065,32.8,8.098354
19,0.264,25,5,41.8767,27.307922,23.0,6.390097
20,0.33,25,5,62.497449,29.58318,17.0,5.91608
21,0.396,25,5,84.998406,12.277859,12.2,5.416026
22,0.462,25,5,84.606197,12.862441,9.8,4.804512


## Cali

In [74]:
import re
from utils import load_pickle
import pandas as pd
import numpy as np

save = True
dset='100000'
version = 'cali'#'(adult2|adult)'
metric = 'MSE'
theta = '(([+-])?\d(\.\d+)?)'
folder = "./results/adjmats/"
list_edges = []
for filename in os.listdir(folder):
    match = re.search(f'W_est.{dset}.(\d).(\d).(.*?).{version}.pkl$', filename)
    if match:
        out_f, in_f, theta = match.groups()
        edges = sum(load_pickle(os.path.join(folder,filename), verbose=False).flatten()>0)
        list_edges.append((float(theta), out_f, in_f, edges))
n_edges = pd.DataFrame(list_edges).groupby(0, as_index=False).agg({3 : ['mean', 'std']})
n_edges.columns = n_edges.columns.droplevel()
n_edges.columns = ['theta','N_edges','Std']

folder = "./results/"
filename = f"Nested5FoldCASTLE.Reg.Synth.100000.{version}.pkl"
describe = load_pickle(os.path.join(folder,filename), verbose=False)
agg_stats = pd.DataFrame([(describe[c]['theta'],c[9],describe[c]['fold'],describe[c][metric]) for c in describe]).groupby(0, as_index=False).agg({2 : ['count','max'], 3 : ['mean', 'std']})
agg_stats.columns = ['theta', 'count','folds',  'accuracy_mean', 'accuracy_std']
agg_stats=agg_stats.merge(n_edges,on='theta')
agg_stats= agg_stats.drop_duplicates(subset=['N_edges'])
sub = -0.5
agg_stats.loc[agg_stats['theta']==-1,'theta'] = sub

filter_theta = 1.8
filter_list = [0.1, 0.188, 0.2,0.25, 0.3,0.4,0.45, 0.376, 0.55,0.65, 0.7,0.75, 0.8,0.85,0.9,0.94, 1.504,1.88, 2.256,2.632]#[0.1,0.188,0.2,0.3,0.35,0.376,0.45,0.65,0.7,0.8,0.85,0.9,0.95]
agg_stats = agg_stats[(agg_stats['theta']<=filter_theta) & (~round(agg_stats['theta'],3).isin(filter_list))]
display(agg_stats)

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['accuracy_mean'], name=metric,         
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['accuracy_std'],
            visible=True)
            ),
    secondary_y=True, 
)

fig.add_trace(
    go.Scatter(x=agg_stats['theta'], y=agg_stats['N_edges'], name="Edges",
    error_y=dict(
            type='data', # value of error bar given in data coordinates
            array=agg_stats['Std'],
            visible=True)
              ),
    secondary_y=False,
)

# Add figure title
fig.update_layout(
    title='',
    legend={
        'y':-0.35,
        'x':0.85,
        'orientation':"h",
        'xanchor': 'center',
        'yanchor': 'bottom'},
    template='plotly_white',
    autosize=True,
    width=600, height=250, 
    margin=dict(
        l=10,
        r=10,
        b=10,
        t=10,
        pad=0
    ),
    font=dict(
        family='Serif',#"Courier New, monospace",
        size=18,
        # color="Black"
    )    
)

# Set x-axis title
fig.update_xaxes(showgrid=True,title={'text':r'$\text{Threshold } \tau$','font':{'size':18#, 'family': 'Courier New, monospace',
}},tickvals=[a for sub in [[0.05],list(np.round(np.arange(0.5, max(agg_stats['theta']), 0.5),1))] for a in sub],
)

# Set y-axes titles
fig.update_yaxes(title={'text':"Number of Edges"#,'font':{'size':18}
}, secondary_y=False)
fig.update_yaxes(showgrid=False,nticks=6,zeroline=False, title={'text':metric#,'font':{'size':18}
}, #nticks=13,
secondary_y=True)


fig.add_annotation(x=0.02, y=-0.14,
            text="CASTLE",
            showarrow=False,
            yshift=0,
            xref='paper',
            yref='paper',)

chosen_tau=0.05
y_co = agg_stats[agg_stats['theta']==chosen_tau]['accuracy_mean']
fig.add_annotation(x=chosen_tau, y=1.0185,
            text=str(chosen_tau),
            showarrow=True,
            yshift=-185,
            xref='x',ax=0,
            yref='paper',ay=0)
fig.add_annotation(x=chosen_tau, y=1.0185,
            text=str(1.02),
            showarrow=True,
            yshift=0,
            xref='x',ax=310,
            yref='y2',ay=0)
fig.add_annotation(x=chosen_tau, y=30.6,
            text=str(31),
            showarrow=True,
            yshift=0,
            xref='x',ax=-120,
            yref='y',ay=0)
            
fig.show()

output_folder = "figures"
if not os.path.exists(output_folder):
    os.mkdir(output_folder)

out_path = os.path.join(output_folder,f"plot_tauoptim_{version}.png")

if save:
    import kaleido
    fig.write_image(out_path)



Unnamed: 0,theta,count,folds,accuracy_mean,accuracy_std,N_edges,Std
0,-0.5,25,5,0.662208,0.081241,72.0,0.0
1,0.0,25,5,1.062381,0.312212,36.0,0.0
2,0.05,25,5,1.018462,0.348869,30.6,2.929733
4,0.15,25,5,1.049525,0.365987,23.4,0.816497
9,0.35,25,5,1.081789,0.323505,18.4,1.527525
13,0.5,25,5,1.083272,0.321736,16.8,1.5
20,0.752,20,5,1.174837,0.294267,14.25,0.850696
25,0.95,15,5,1.339808,0.030016,11.0,2.236068
27,1.316,20,5,1.339957,0.023627,8.75,1.681947
29,1.692,25,5,1.331741,0.02689,6.2,1.0
