# Analyse CV synthetic data
- Data are generated using `AnomalyGm_debug .ipynb`. 
- Results are inside `../data/output/5-fold_cv/synthetic/edge_anomalies/`, example: `500_3_20.0_1.0_False_1_cv.csv`
- Inference performed running:
```bash
python3 main_cv_syn.py -K 3 -l 500_3_20.0_0.01_False_0 -f ../data/input/synthetic/edge_anomalies/ -o ../data/output/5-fold_cv/edge_anomalies/ -E 0  ;
python3 main_cv_syn.py -K 3 -l 500_3_20.0_0.01_False_0 -f ../data/input/synthetic/edge_anomalies/ -o ../data/output/5-fold_cv/edge_anomalies/ -E 1  ;
```

Paramters have been fixed as:
- N = 500
- K = 3
- $<k>$ = 20.0
- seed = [0,9]

In [90]:
savefigs = True
NN = 10

In [91]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [92]:
%matplotlib notebook

In [93]:
def flt(x,d=1):
    return round(x, d)

In [102]:
indir0 = '../data/output/5-fold_cv/synthetic/edge_anomalies_eta1/'
indir  = indir0

Fixed parameters

In [103]:
N = '500'
K = '3'
k = '60'
eta = 1

In [104]:
N = '200'
K = '3'
k = '30'
eta = 1

In [105]:
algo_name = {True: 'AnomalyCD',False: 'StandardCD'}
colors = {'AnomalyCD': 'dodgerblue','StandardCD': 'salmon'}
colors_s = {'AnomalyCD': 'dodgerblue','StandardCD': 'salmon'}
markers = {'AnomalyCD': 'o','StandardCD': 'D'}
lbl_metric = {'F1Q_test': r'F1$(Z_{GT},Z_{inf})$'}

In [106]:
import seaborn as sns
sns.set_style("white", {'axes.grid' : False})
sns.set_context("paper", font_scale=1.75, rc={"lines.linewidth": 2.5} )
sns.set_palette("tab20", 8, .75) 
sns.set_style('ticks') 
# palette = plt.get_cmap('tab20b')

Tuned parameters

In [107]:
flag_node_anomaly = False
# indir = indir0 + 'edge_anomalies/' if flag_node_anomaly == False else indir0 + 'node_anomalies/'

# rhos_float = list(np.linspace(0.,0.2,21))
# rhos_float = [0.0,0.02,0.05,0.1,0.15,0.2]
# rhos_float.extend(list(np.linspace(0.3,1.0,8)))
# rhos_float = list(np.linspace(0.0,1.0,11))
# rhos_float = [0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ]
rhos_float = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# rhos_float = [0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
#        0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
#        0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41,
#        0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52,
#        0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71,
#        0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 , 0.9, 1. ]

# rhos_float = [0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.07, 0.08, 0.09, 0.1 ,
#        0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
#        0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8 , 0.9, 1. ]
pi = 0.3

### AUC plot

In [108]:
y = {}
x = {}
deltaX = 0.001


metric = 'F1Q_test'

# for rho_float in np.linspace(0.,0.05,6):
for rho_float in rhos_float:
#     rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
    rho = str(flt(rho_float,d=2)) 
    y[rho] = {}
    x[rho] = {}
    for a in [True,False]:
        y[rho][algo_name[a]] , x[rho][algo_name[a]] = [],[]
    
        if a == True:
            x[rho][algo_name[a]].append(rho_float - deltaX )  
        else:
            x[rho][algo_name[a]].append(rho_float + deltaX )  

    for seed in range(10): 
#         network = ('_').join([N,K,k,rho,str(seed),'cv']) + '.csv' 
        network = ('_').join([N,K,k,rho.split('.')[1],str(eta),str(seed),'cv']) + '.csv' 
    
        df = pd.read_csv(indir+network)
        
        for i,(n,g) in enumerate(df.groupby(by=['flag_anomaly'])): 
            g1 = g.sort_values(by=['fold'])
            y[rho][algo_name[n]].extend(g1[metric].values) 

    for a in [True,False]:
        y[rho][algo_name[a]] = np.array(y[rho][algo_name[a]],dtype=object)    
        x[rho][algo_name[a]] = np.array(x[rho][algo_name[a]],dtype=object) 
#         print(rho,algo_name[a],y[rho][algo_name[a]].shape[0])

In [109]:
indir+network

'../data/output/5-fold_cv/synthetic/edge_anomalies_eta1/200_3_30_9_1_9_cv.csv'

### Calculate mean and std over (seeds and fold)

In [110]:
rhos = list(y.keys())
# rhos

In [111]:
algos = list(y['0.1'].keys())
algos

['AnomalyCD', 'StandardCD']

In [112]:
# rho = '0.0'
# a = algo_name[True]
# plt.figure(figsize=(4,4))
# plt.plot(np.arange(len(y[rho][a])),y[rho][a])

In [113]:
y_avg , y_std= {}, {}
x_avg , x_std= {}, {}
for a in algos: # algorithms
    y_avg[a] = np.empty(len(rhos))
    x_avg[a] = np.empty(len(rhos))
    y_std[a] = np.empty(len(rhos))
    for idx, rho_float in enumerate(rhos_float):
        rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
        tmp = y[rho][a]
        y_avg[a][idx] = np.mean(tmp)
        y_std[a][idx] = np.std(tmp)
        
        x_avg[a][idx] = x[rho][a][0]
    print(a,y_avg[a])


AnomalyCD [0. 0. 0. 0. 0. 0. 0. 0. 0.]
StandardCD [0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [114]:
algos

['AnomalyCD', 'StandardCD']

In [115]:
str(pi).split('.')[0]

'0'

In [116]:
fs = 20
ms = 100
outfig = '../figures/F1_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig)
plt.figure()
# for a in algos:
for a in ['AnomalyCD']:
    # a = 'AnomalyCD'
    plt.scatter(x_avg[a],y_avg[a],label=a,c=colors[a],s=ms,marker=markers[a],edgecolor='dimgrey')
    plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
# plt.legend(fontsize=fs,loc= 'best')
plt.ylabel(lbl_metric[metric],fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
# plt.ylim([-0.2,0.7])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 
else:
    plt.show()

../figures/F1_synt_pi0.png


<IPython.core.display.Javascript object>

In [117]:
a = 'AnomalyCD'
y_avg[a][0] = 0

In [118]:
fs = 20
ms = 100
outfig = '../figures/F1_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig)
plt.figure()
for a in ['AnomalyCD']:
    plt.plot(x_avg[a],y_avg[a],label=a,c=colors[a])
    plt.fill_between(x_avg[a], y_avg[a]+(y_std[a]/2), y_avg[a]-(y_std[a]/2), color=colors_s[a], alpha=0.5)
#     plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
# plt.legend(fontsize=fs,loc= 'best')
plt.ylabel(lbl_metric[metric],fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
# plt.ylim([-0.2,0.7])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/F1_synt_pi0.png


<IPython.core.display.Javascript object>

In [119]:
y_avg[a]

array([0., 0., 0., 0., 0., 0., 0., 0., 0.])

### Pre

In [120]:
# y = {}
# x = {}
# deltaX = 0.001

# metric = 'pre_test'

# # for rho_float in np.linspace(0.,0.05,6):
# for rho_float in rhos_float:
# #     rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
#     rho = str(flt(rho_float,d=2))
#     y[rho] = {}
#     x[rho] = {}
#     for a in [True,False]:
#         y[rho][algo_name[a]] , x[rho][algo_name[a]] = [],[]
    
#         if a == True:
#             x[rho][algo_name[a]].append(rho_float - deltaX )  
#         else:
#             x[rho][algo_name[a]].append(rho_float + deltaX )  

#     for seed in range(0,20):
# #         network = ('_').join([N,K,k,rho,str(pi),str(flag_node_anomaly),str(seed),'cv']) + '.csv'
#         network = ('_').join([N,K,k,rho,str(seed),'cv']) + '.csv'
#         df = pd.read_csv(indir+network)
        
#         for i,(n,g) in enumerate(df.groupby(by=['flag_anomaly'])):
#             g1 = g.sort_values(by=['fold']) 
#             print(g1)
#             y[rho][algo_name[n]].extend(g1[metric].values) 

#     for a in [True,False]:
#         y[rho][algo_name[a]] = np.array(y[rho][algo_name[a]],dtype=object)    
#         x[rho][algo_name[a]] = np.array(x[rho][algo_name[a]],dtype=object) 
# #         print(rho,algo_name[a],y[rho][algo_name[a]].shape[0])

In [121]:
# rhos = list(y.keys()) 

In [122]:
# algos = list(y['0.0'].keys())
# algos

In [123]:
# y_avg , y_std= {}, {}
# x_avg , x_std= {}, {}
# for a in algos: # algorithms
#     y_avg[a] = np.empty(len(rhos))
#     x_avg[a] = np.empty(len(rhos))
#     y_std[a] = np.empty(len(rhos))
#     for idx, rho_float in enumerate(rhos_float):
#         rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
#         tmp = y[rho][a]
#         y_avg[a][idx] = np.mean(tmp)
#         y_std[a][idx] = np.std(tmp)
        
#         x_avg[a][idx] = x[rho][a][0]
#     print(a,y_avg[a])


In [124]:
# fs = 20
# ms = 100
# outfig = '../figures/pre_synt_pi'+str(pi).split('.')[0]+'_mod.png' 
# print(outfig)
# plt.figure()
# # for a in algos:
# for a in ['AnomalyCD']:
#     # a = 'AnomalyCD'
#     plt.scatter(x_avg[a],y_avg[a],label=a,c=colors[a],s=ms,marker=markers[a],edgecolor='dimgrey')
#     plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# # if rho == '0.0':
# # plt.legend(fontsize=fs,loc= 'best')
# plt.ylabel('Precision',fontsize=fs)
    
# plt.xlabel(r'$\rho_a$',fontsize=fs)
# # plt.xticks(np.arange(7),np.arange(7))
# # plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
# # plt.ylim([-0.2,0.7])
# plt.tight_layout()
# if savefigs:
#     plt.savefig(outfig, dpi=300)

In [125]:
# fs = 20
# ms = 100
# outfig = '../figures/pre_synt_pi'+str(pi).split('.')[0]+'_mod.png' 
# print(outfig)
# plt.figure()
# # for a in algos:
# for a in ['AnomalyCD']:
#     plt.plot(x_avg[a],y_avg[a],label=a,c=colors[a])
#     plt.fill_between(x_avg[a], y_avg[a]+(y_std[a]/2), y_avg[a]-(y_std[a]/2), color=colors_s[a], alpha=0.5)
# #     plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# # if rho == '0.0':
# # plt.legend(fontsize=fs,loc= 'best')
# plt.ylabel('Precision',fontsize=fs)
    
# plt.xlabel(r'$\rho_a$',fontsize=fs)
# # plt.xticks(np.arange(7),np.arange(7))
# # plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
# # plt.ylim([-0.2,0.7])
# plt.tight_layout()
# if savefigs:
#     plt.savefig(outfig, dpi=300)

### AUC Q

In [126]:
df.head()

Unnamed: 0,K,fold,rseed,flag_anomaly,eta,mu,pi,aucA_train,aucA_test,aucZ_train,aucZ_test,ELBO,CS_U,CS_V,F1Q_train,F1Q_test,auc_marg_train,auc_marg_test,auc_cond_train,auc_cond_test
0,3,0,265,False,8.796436,0.0,1.0,0.829749,0.508985,0.5,0.5,-3446.749903,0.533985,0.555057,0.0,0.0,0.830516,0.511518,0.850201,0.593899
1,3,0,265,True,9.116324,0.086418,0.025515,0.809477,0.481437,0.385439,0.356586,-3214.742325,0.548267,0.559826,0.0,0.0,0.701633,0.504707,0.741892,0.592036
2,3,1,265,False,8.425251,0.0,1.0,0.83191,0.494527,0.5,0.5,-3384.795453,0.56277,0.587111,0.0,0.0,0.833041,0.497129,0.85127,0.582632
3,3,1,265,True,8.75365,0.086418,0.024894,0.804888,0.483527,0.367236,0.41815,-3120.429927,0.591003,0.600974,0.0,0.0,0.785377,0.500037,0.806365,0.583653
4,3,2,265,False,11.452338,0.0,1.0,0.828355,0.512855,0.5,0.5,-3310.689999,0.529935,0.533582,0.0,0.0,0.644043,0.504739,0.710189,0.569648


In [127]:
y = {}
x = {}
deltaX = 0.001

metric = 'aucA_test'

# for rho_float in np.linspace(0.,0.05,6):
for rho_float in rhos_float:
#     rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
    rho = str(flt(rho_float,d=2)) 
    y[rho] = {}
    x[rho] = {}
    for a in [True,False]:
        y[rho][algo_name[a]] , x[rho][algo_name[a]] = [],[]
    
        if a == True:
            x[rho][algo_name[a]].append(rho_float - deltaX )  
        else:
            x[rho][algo_name[a]].append(rho_float + deltaX )  

#     for seed in [0,2,5,18,26,39,56,63,75,80,85]:
    
    for seed in range(10):
#         network = ('_').join([N,K,k,rho,str(seed),'cv']) + '.csv'
        network = ('_').join([N,K,k,rho.split('.')[1],str(eta),str(seed),'cv']) + '.csv'
        df = pd.read_csv(indir+network)
        
        for i,(n,g) in enumerate(df.groupby(by=['flag_anomaly'])):
            g1 = g.sort_values(by=['fold'])
            y[rho][algo_name[n]].extend(g1[metric].values) 

    for a in [True,False]:
        y[rho][algo_name[a]] = np.array(y[rho][algo_name[a]],dtype=object)    
        x[rho][algo_name[a]] = np.array(x[rho][algo_name[a]],dtype=object) 
#         print(rho,algo_name[a],y[rho][algo_name[a]].shape[0])

In [128]:
y_avg , y_std= {}, {}
x_avg , x_std= {}, {}
for a in algos: # algorithms
    y_avg[a] = np.empty(len(rhos))
    x_avg[a] = np.empty(len(rhos))
    y_std[a] = np.empty(len(rhos))
    for idx, rho_float in enumerate(rhos_float):
        rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
        tmp = y[rho][a]
        y_avg[a][idx] = np.mean(tmp)
        y_std[a][idx] = np.std(tmp)
        
        x_avg[a][idx] = x[rho][a][0]
#         print(a,y_avg[a],y_std[a])

In [129]:
fs = 20
ms = 100
outfig = '../figures/AUC_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig)
plt.figure()
for a in algos:
    plt.scatter(x_avg[a],y_avg[a],label=a,c=colors[a],s=ms,marker=markers[a],edgecolor='dimgrey')
    plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
plt.legend(fontsize=18,loc= 'best')
plt.ylabel('AUC',fontsize=22)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
# plt.ylim([0.4,0.99])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/AUC_synt_pi0.png


<IPython.core.display.Javascript object>

In [130]:
fs = 20
ms = 100
outfig = '../figures/AUC_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig)
plt.figure()
for a in algos:
    plt.plot(x_avg[a],y_avg[a],label=a,c=colors[a])
    plt.fill_between(x_avg[a], y_avg[a]+(y_std[a]/2), y_avg[a]-(y_std[a]/2), color=colors_s[a], alpha=0.5)
#     plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
plt.legend(fontsize=17,loc= 'best')
plt.ylabel('AUC',fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
plt.ylim([0.4,1.0])
# plt.xlim([0,1.])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/AUC_synt_pi0.png


<IPython.core.display.Javascript object>

### Communities similarity

In [131]:
y = {}
x = {}
deltaX = 0.001

metric = 'CS_U'

# for rho_float in np.linspace(0.,0.05,6):
for rho_float in rhos_float:
#     rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
    rho = str(flt(rho_float,d=2)) 
    y[rho] = {}
    x[rho] = {}
    for a in [True,False]:
        y[rho][algo_name[a]] , x[rho][algo_name[a]] = [],[]
    
        if a == True:
            x[rho][algo_name[a]].append(rho_float - deltaX )  
        else:
            x[rho][algo_name[a]].append(rho_float + deltaX )  
 
    for seed in range(10):
#         network = ('_').join([N,K,k,rho,str(seed),'cv']) + '.csv'
        network = ('_').join([N,K,k,rho.split('.')[1],str(eta),str(seed),'cv']) + '.csv'
        df = pd.read_csv(indir+network)
        
        for i,(n,g) in enumerate(df.groupby(by=['flag_anomaly'])):
            g1 = g.sort_values(by=['fold'])
            y[rho][algo_name[n]].extend(g1[metric].values) 

    for a in [True,False]:
        y[rho][algo_name[a]] = np.array(y[rho][algo_name[a]],dtype=object)    
        x[rho][algo_name[a]] = np.array(x[rho][algo_name[a]],dtype=object) 
#         print(rho,algo_name[a],y[rho][algo_name[a]].shape[0])

Calculate percentage of wins

In [132]:
# y_prc = {}
# for a in y[1].keys(): y_prc[a] = np.zeros(int(x[seed]['CRepDyn_static0'].max())  )
# for seed in y.keys():
#     tmp1 = y[seed]['CRepDyn_static'].values > y[seed]['CRepDyn_static0'].values
# #     tmp2 = y[seed]['CRepDyn_static'].values > y[seed]['aggr'].values
#     tmp2 = True
#     y_prc['CRepDyn_static'] += np.logical_and(tmp1,tmp2)
    
#     tmp1 = y[seed]['CRepDyn_static0'].values > y[seed]['CRepDyn_static'].values
# #     tmp2 = y[seed]['CRepDyn_static0'].values > y[seed]['aggr'].values
#     tmp2 = True
#     y_prc['CRepDyn_static0'] += np.logical_and(tmp1,tmp2)

# #     tmp1 = y[seed]['aggr'].values > y[seed]['CRepDyn_static'].values
# #     tmp2 = y[seed]['aggr'].values > y[seed]['CRepDyn_static'].values
# #     y_prc['aggr'] += np.logical_and(tmp1,tmp2)

# tot = y_prc['CRepDyn_static0'] + y_prc['CRepDyn_static']
# tot[tot==0] = 1
# y_prc['CRepDyn_static'] /= tot
# y_prc['CRepDyn_static0'] /= tot
# print(len(y.keys()))

In [133]:
# df_prc = pd.DataFrame(y_prc)
# df_prc

Calculate mean and std over seeds

In [134]:
y_avg , y_std= {}, {}
x_avg , x_std= {}, {}
for a in algos: # algorithms
    y_avg[a] = np.empty(len(rhos))
    x_avg[a] = np.empty(len(rhos))
    y_std[a] = np.empty(len(rhos))
    for idx, rho_float in enumerate(rhos_float):
        rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
        tmp = y[rho][a]
        y_avg[a][idx] = np.mean(tmp)
        y_std[a][idx] = np.std(tmp)
        
        x_avg[a][idx] = x[rho][a][0]


In [135]:
fs = 20
ms = 100
outfig = '../figures/CS_synt_pi'+str(pi).split('.')[0]+'_a.png' 
print(outfig)
plt.figure()
for a in algos:
    plt.scatter(x_avg[a],y_avg[a],label=a,c=colors[a],s=ms,marker=markers[a],edgecolor='dimgrey')
    plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
plt.legend(fontsize=18,loc= 'best')
plt.ylabel('CS',fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
plt.ylim([0.5,1.1])
# plt.xlim([0,1.])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/CS_synt_pi0_a.png


<IPython.core.display.Javascript object>

In [136]:
# for a in algos: 
#     y_avg[a][-1] = [] 
#     y_std[a][-1] = []
    

In [137]:
# x_avg['AnomalyCD']

In [138]:
# x_avg['StandardCD']= np.array([-0.001,  0.099,  0.199,  0.299,  0.399,  0.499,  0.599,  0.699,
#         0.799,  0.899])

In [139]:
# y_avg['AnomalyCD']=np.array([0.95375932, 0.93584611, 0.93037937, 0.9193741 , 0.89371461,
#        0.85021016, 0.79443625, 0.70986187, 0.61390789, 0.51690801])

In [140]:
# y_avg['StandardCD']= np.array([0.9584331 , 0.92784245, 0.89496878, 0.85119903, 0.79375537,
#        0.73584972, 0.68320131, 0.62777788, 0.58143344, 0.53868667])

In [141]:
# y_std['AnomalyCD']= np.array([0.01585516, 0.01615706, 0.01792772, 0.01791092, 0.02228022,
#        0.02685334, 0.02328494, 0.02645034, 0.02572904, 0.02550574])

In [142]:
# y_std['StandardCD']= np.array([0.01257393, 0.01821112, 0.01952351, 0.02059004, 0.02277764,
#        0.01910548, 0.02115511, 0.02050553, 0.01774245, 0.02017839])

In [143]:
# y_std['StandardCD']

In [144]:
fs = 20
ms = 100
outfig = '../figures/CS_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig)
plt.figure()
for a in algos:
    plt.plot(x_avg[a],y_avg[a],label=a,c=colors[a])
    plt.fill_between(x_avg[a], y_avg[a]+(y_std[a]/2), y_avg[a]-(y_std[a]/2), color=colors_s[a], alpha=0.5)
#     plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
plt.legend(fontsize=17,loc= 'best')
plt.ylabel('CS',fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
plt.ylim([0.5,1.1])
# plt.xlim([0,1.])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/CS_synt_pi0.png


<IPython.core.display.Javascript object>

Calculate percentage of wins

In [145]:
# y_prc = {}
# for a in y[1].keys(): y_prc[a] = np.zeros(int(x[seed]['CRepDyn_static0'].max())  )
# for seed in y.keys():
#     tmp1 = y[seed]['CRepDyn_static'].values > y[seed]['CRepDyn_static0'].values
# #     tmp2 = y[seed]['CRepDyn_static'].values > y[seed]['aggr'].values
#     tmp2 = True
#     y_prc['CRepDyn_static'] += np.logical_and(tmp1,tmp2)
    
#     tmp1 = y[seed]['CRepDyn_static0'].values > y[seed]['CRepDyn_static'].values
# #     tmp2 = y[seed]['CRepDyn_static0'].values > y[seed]['aggr'].values
#     tmp2 = True
#     y_prc['CRepDyn_static0'] += np.logical_and(tmp1,tmp2)

# #     tmp1 = y[seed]['aggr'].values > y[seed]['CRepDyn_static'].values
# #     tmp2 = y[seed]['aggr'].values > y[seed]['CRepDyn_static'].values
# #     y_prc['aggr'] += np.logical_and(tmp1,tmp2)

# tot = y_prc['CRepDyn_static0'] + y_prc['CRepDyn_static']
# tot[tot==0] = 1
# tot = 1
# y_prc['CRepDyn_static'] /= tot
# y_prc['CRepDyn_static0'] /= tot
# print(len(y.keys()))

In [146]:
# df_prc = pd.DataFrame(y_prc)
# df_prc

## auc_marg_test

In [147]:
y = {}
x = {}
deltaX = 0.001

metric = 'auc_marg_test'

# for rho_float in np.linspace(0.,0.05,6):
for rho_float in rhos_float:
#     rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
    rho = str(flt(rho_float,d=2)) 
    y[rho] = {}
    x[rho] = {}
    for a in [True,False]:
        y[rho][algo_name[a]] , x[rho][algo_name[a]] = [],[]
    
        if a == True:
            x[rho][algo_name[a]].append(rho_float - deltaX )  
        else:
            x[rho][algo_name[a]].append(rho_float + deltaX )  
 
    for seed in range(10):
#         network = ('_').join([N,K,k,rho,str(seed),'cv']) + '.csv'
        network = ('_').join([N,K,k,rho.split('.')[1],str(eta),str(seed),'cv']) + '.csv'
        df = pd.read_csv(indir+network)
        
        for i,(n,g) in enumerate(df.groupby(by=['flag_anomaly'])):
            g1 = g.sort_values(by=['fold'])
            y[rho][algo_name[n]].extend(g1[metric].values) 

    for a in [True,False]:
        y[rho][algo_name[a]] = np.array(y[rho][algo_name[a]],dtype=object)    
        x[rho][algo_name[a]] = np.array(x[rho][algo_name[a]],dtype=object) 
#         print(rho,algo_name[a],y[rho][algo_name[a]].shape[0])

In [148]:
y_avg , y_std= {}, {}
x_avg , x_std= {}, {}
for a in algos: # algorithms
    y_avg[a] = np.empty(len(rhos))
    x_avg[a] = np.empty(len(rhos))
    y_std[a] = np.empty(len(rhos))
    for idx, rho_float in enumerate(rhos_float):
        rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
        tmp = y[rho][a]
        y_avg[a][idx] = np.mean(tmp)
        y_std[a][idx] = np.std(tmp)
        
        x_avg[a][idx] = x[rho][a][0]

In [149]:
fs = 20
ms = 100
outfig = '../figures/AUC_marg_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig) 
plt.figure()
for a in algos:
    plt.scatter(x_avg[a],y_avg[a],label=a,c=colors[a],s=ms,marker=markers[a],edgecolor='dimgrey')
    plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
plt.legend(fontsize=17,loc= 'best')
plt.ylabel('AUC',fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
plt.ylim([0.4,1.0])
# plt.xlim([0,1.])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/AUC_marg_synt_pi0.png


<IPython.core.display.Javascript object>

## auc_cond_test

In [150]:
y = {}
x = {}
deltaX = 0.001

metric = 'auc_cond_test'

# for rho_float in np.linspace(0.,0.05,6):
for rho_float in rhos_float:
#     rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
    rho = str(flt(rho_float,d=2)) 
    y[rho] = {}
    x[rho] = {}
    for a in [True,False]:
        y[rho][algo_name[a]] , x[rho][algo_name[a]] = [],[]
    
        if a == True:
            x[rho][algo_name[a]].append(rho_float - deltaX )  
        else:
            x[rho][algo_name[a]].append(rho_float + deltaX )  
 
    for seed in range(10):
#         network = ('_').join([N,K,k,rho,str(seed),'cv']) + '.csv'
        network = ('_').join([N,K,k,rho.split('.')[1],str(eta),str(seed),'cv']) + '.csv'
        df = pd.read_csv(indir+network)
        
        for i,(n,g) in enumerate(df.groupby(by=['flag_anomaly'])):
            g1 = g.sort_values(by=['fold'])
            y[rho][algo_name[n]].extend(g1[metric].values) 

    for a in [True,False]:
        y[rho][algo_name[a]] = np.array(y[rho][algo_name[a]],dtype=object)    
        x[rho][algo_name[a]] = np.array(x[rho][algo_name[a]],dtype=object) 
#         print(rho,algo_name[a],y[rho][algo_name[a]].shape[0])

In [151]:
y_avg , y_std= {}, {}
x_avg , x_std= {}, {}
for a in algos: # algorithms
    y_avg[a] = np.empty(len(rhos))
    x_avg[a] = np.empty(len(rhos))
    y_std[a] = np.empty(len(rhos))
    for idx, rho_float in enumerate(rhos_float):
        rho = str(flt(rho_float,d=2)) if np.allclose(rho_float,0) == False else str(flt(rho_float,d=1))
        tmp = y[rho][a]
        y_avg[a][idx] = np.mean(tmp)
        y_std[a][idx] = np.std(tmp)
        
        x_avg[a][idx] = x[rho][a][0]

In [152]:
fs = 20
ms = 100
outfig = '../figures/AUC_cond_synt_pi'+str(pi).split('.')[0]+'.png' 
print(outfig) 
plt.figure()
for a in algos:
    plt.scatter(x_avg[a],y_avg[a],label=a,c=colors[a],s=ms,marker=markers[a],edgecolor='dimgrey')
    plt.errorbar(x_avg[a],y_avg[a],yerr=y_std[a],ecolor=colors[a],fmt='',capsize=5,ls='',alpha=0.8)

# if rho == '0.0':
plt.legend(fontsize=17,loc= 'best')
plt.ylabel('AUC',fontsize=fs)
    
plt.xlabel(r'$\rho_a$',fontsize=fs)
# plt.xticks(np.arange(7),np.arange(7))
# plt.text(4.5,0.87,r'$\eta=$'+eta,fontsize=fs, bbox=dict(facecolor='w', alpha=0.5))
plt.ylim([0.4,1.0])
# plt.xlim([0,1.])
plt.tight_layout()
if savefigs:
    plt.savefig(outfig, dpi=300) 

../figures/AUC_cond_synt_pi0.png


<IPython.core.display.Javascript object>