In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
import json
import logging
import multiprocessing
from functools import partial

# Data processing
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid

# Networks
import networkx as nx

# Modules
sys.path.insert(0, '../scripts')
from evaluate import Evaluate
from gsd import GSD

### Directory setup

In [3]:
home_dir = "../evaluation_clean/"
data_param_file = os.path.join(home_dir, "data_specs.json")
model_param_file = os.path.join(home_dir, "model_specs.json")

### Instantiate Evaluation object 

In [4]:
e = Evaluate(home_dir, data_param_file, model_param_file)

Number of datasets: 6


### Generate synthetic data, or load if file exists already

In [7]:
e.initialize_data()

04:16:15 - Evaluate: INFO - Wrote data matrix: ../evaluation_clean/data/data_matrices/D_5_100_1_Z_20_5_1_noise_0.npy.
04:16:16 - Evaluate: INFO - Wrote data matrix: ../evaluation_clean/data/data_matrices/D_5_100_2_Z_20_5_2_noise_0.npy.
04:16:16 - Evaluate: INFO - Wrote data matrix: ../evaluation_clean/data/data_matrices/D_5_100_1_Z_20_5_1_noise_0.2.npy.
04:16:16 - Evaluate: INFO - Wrote data matrix: ../evaluation_clean/data/data_matrices/D_5_100_2_Z_20_5_2_noise_0.2.npy.
04:16:16 - Evaluate: INFO - Wrote data matrix: ../evaluation_clean/data/data_matrices/D_5_100_1_Z_20_5_1_noise_0.4.npy.
04:16:16 - Evaluate: INFO - Wrote data matrix: ../evaluation_clean/data/data_matrices/D_5_100_2_Z_20_5_2_noise_0.4.npy.


### Run models on each dataset and save results

In [8]:
for model_params in e.model_paramlist: 
    e.run_grid(model_params)

04:16:24 - Evaluate: INFO - Running ICA1 with parameters: {'cutoff': 0.01, 'n_components': 10, 'method': 'ICA1'}
04:17:21 - Models: INFO - Run complete, results written to: ../evaluation_clean/results/ICA1_10_0.01/D_5_100_2_Z_20_5_2_noise_0.npy
04:17:30 - Models: INFO - Run complete, results written to: ../evaluation_clean/results/ICA1_10_0.01/D_5_100_1_Z_20_5_1_noise_0.npy
04:17:33 - Models: INFO - Run complete, results written to: ../evaluation_clean/results/ICA1_10_0.01/D_5_100_1_Z_20_5_1_noise_0.4.npy
04:17:35 - Models: INFO - Run complete, results written to: ../evaluation_clean/results/ICA1_10_0.01/D_5_100_2_Z_20_5_2_noise_0.2.npy
04:17:36 - Models: INFO - Run complete, results written to: ../evaluation_clean/results/ICA1_10_0.01/D_5_100_2_Z_20_5_2_noise_0.4.npy
04:17:36 - Models: INFO - Run complete, results written to: ../evaluation_clean/results/ICA1_10_0.01/D_5_100_1_Z_20_5_1_noise_0.2.npy
04:17:36 - Evaluate: INFO - Running GSD with parameters: {'a': 0.1, 'edge_file': '/nfs/

### Merge all results

In [9]:
scores = []

for model_params in e.model_paramlist: 
    scores += e.score_results(model_params)
    
scores_df = pd.DataFrame(scores)
scores_df.head()

Unnamed: 0,F1rr,model,n_samples,n_sources,noise,recovery,relevance,rep,size
0,0.770005,ICA1_10_0.01,20,5,0.0,0.981506,0.633496,1,100
1,0.774671,ICA1_10_0.01,20,5,0.0,0.984502,0.638569,2,100
2,0.118007,ICA1_10_0.01,20,5,0.2,0.222795,0.080259,1,100
3,0.104959,ICA1_10_0.01,20,5,0.2,0.211338,0.069816,2,100
4,0.065736,ICA1_10_0.01,20,5,0.4,0.166867,0.04093,1,100


### Average over replicates

In [10]:
tmp = scores_df.groupby(by=["model", "size", "n_sources", "n_samples", "noise"]).agg([np.mean, np.std]).drop(columns=['rep'])
tmp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,F1rr,F1rr,recovery,recovery,relevance,relevance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,mean,std,mean,std
model,size,n_sources,n_samples,noise,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
GSD_0.1_3_ica_5,100,5,20,0.0,0.010304,0.00073,0.00902,0.00027,0.012206,0.002495
GSD_0.1_3_ica_5,100,5,20,0.2,0.008867,0.000634,0.00875,0.000452,0.00899,0.000826
GSD_0.1_3_ica_5,100,5,20,0.4,0.008688,0.000563,0.008359,0.000862,0.00906,0.000209
ICA1_10_0.01,100,5,20,0.0,0.772338,0.003299,0.983004,0.002119,0.636033,0.003588
ICA1_10_0.01,100,5,20,0.2,0.111483,0.009227,0.217066,0.008101,0.075037,0.007384


In [11]:
tmp.query('model=="ICA1_10_0.01"')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,F1rr,F1rr,recovery,recovery,relevance,relevance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,mean,std,mean,std
model,size,n_sources,n_samples,noise,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ICA1_10_0.01,100,5,20,0.0,0.772338,0.003299,0.983004,0.002119,0.636033,0.003588
ICA1_10_0.01,100,5,20,0.2,0.111483,0.009227,0.217066,0.008101,0.075037,0.007384
ICA1_10_0.01,100,5,20,0.4,0.065479,0.000363,0.17238,0.007797,0.040431,0.000706
