In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
import json
import logging
import multiprocessing
from functools import partial

# Data processing
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid

# Networks
import networkx as nx

# Modules
sys.path.insert(0, '../scripts')
from evaluate import Evaluate
from gsd import GSD

### Directory setup

Copy from `/nfs/latdata/iamjli/projects/GSD/evaluation`

In [5]:
home_dir = "../evaluation/"
data_param_file = os.path.join(home_dir, "data_specs.json")
model_param_file = os.path.join(home_dir, "model_specs.json")

### Instantiate Evaluation object 

In [6]:
e = Evaluate(home_dir, data_param_file, model_param_file)

### Generate synthetic data, or load if file exists already

In [7]:
e.initialize_data()

04:29:07 - Evaluate: INFO - Wrote data matrix: ../evaluation/data/data_matrices/D_5_100_1_Z_20_5_1_noise_0.npy.
04:29:07 - Evaluate: INFO - Wrote data matrix: ../evaluation/data/data_matrices/D_5_100_2_Z_20_5_2_noise_0.npy.
04:29:07 - Evaluate: INFO - Wrote data matrix: ../evaluation/data/data_matrices/D_5_100_1_Z_20_5_1_noise_0.2.npy.
04:29:07 - Evaluate: INFO - Wrote data matrix: ../evaluation/data/data_matrices/D_5_100_2_Z_20_5_2_noise_0.2.npy.
04:29:08 - Evaluate: INFO - Wrote data matrix: ../evaluation/data/data_matrices/D_5_100_1_Z_20_5_1_noise_0.4.npy.
04:29:08 - Evaluate: INFO - Wrote data matrix: ../evaluation/data/data_matrices/D_5_100_2_Z_20_5_2_noise_0.4.npy.


### Run models on each dataset and save results

In [8]:
for model_params in e.model_paramlist: 
    e.run_grid(model_params)

04:29:09 - Evaluate: INFO - Running ICA1 with parameters: {'cutoff': 0.01, 'n_components': 10, 'method': 'ICA1'}
04:29:14 - Models: INFO - Run complete, results written to: ../evaluation/results/ICA1_10_0.01/D_5_100_1_Z_20_5_1_noise_0.2.npy
04:29:58 - Models: INFO - Run complete, results written to: ../evaluation/results/ICA1_10_0.01/D_5_100_2_Z_20_5_2_noise_0.npy
04:30:03 - Models: INFO - Run complete, results written to: ../evaluation/results/ICA1_10_0.01/D_5_100_1_Z_20_5_1_noise_0.npy
04:30:07 - Models: INFO - Run complete, results written to: ../evaluation/results/ICA1_10_0.01/D_5_100_1_Z_20_5_1_noise_0.4.npy
04:30:10 - Models: INFO - Run complete, results written to: ../evaluation/results/ICA1_10_0.01/D_5_100_2_Z_20_5_2_noise_0.4.npy
04:30:10 - Models: INFO - Run complete, results written to: ../evaluation/results/ICA1_10_0.01/D_5_100_2_Z_20_5_2_noise_0.2.npy
04:30:10 - Evaluate: INFO - Running GSD with parameters: {'a': 0.1, 'edge_file': '/nfs/latdata/iamjli/projects/GSD/evaluati

### Merge all results

In [9]:
scores = []

for model_params in e.model_paramlist: 
    scores += e.score_results(model_params)
    
scores_df = pd.DataFrame(scores)
scores_df.head()

Unnamed: 0,F1rr,model,n_samples,n_sources,noise,recovery,relevance,rep,size
0,0.799788,ICA1_10_0.01,20,5,0.0,1.0,0.666373,1,100
1,0.78945,ICA1_10_0.01,20,5,0.0,0.996078,0.653821,2,100
2,0.13382,ICA1_10_0.01,20,5,0.2,0.227493,0.094789,1,100
3,0.105004,ICA1_10_0.01,20,5,0.2,0.200567,0.071119,2,100
4,0.073471,ICA1_10_0.01,20,5,0.4,0.192356,0.045408,1,100


### Average over replicates

In [10]:
tmp = scores_df.groupby(by=["model", "size", "n_sources", "n_samples", "noise"]).agg([np.mean, np.std]).drop(columns=['rep'])
tmp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,F1rr,F1rr,recovery,recovery,relevance,relevance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,mean,std,mean,std
model,size,n_sources,n_samples,noise,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
GSD_0.1_3_ica_5,100,5,20,0.0,0.011141,0.002536,0.009597,0.002052,0.013282,0.003275
GSD_0.1_3_ica_5,100,5,20,0.2,0.008982,0.001226,0.008678,0.001191,0.009307,0.001264
GSD_0.1_3_ica_5,100,5,20,0.4,0.008161,0.00067,0.007966,0.000553,0.008367,0.000799
ICA1_10_0.01,100,5,20,0.0,0.794619,0.00731,0.998039,0.002773,0.660097,0.008875
ICA1_10_0.01,100,5,20,0.2,0.119412,0.020375,0.21403,0.019039,0.082954,0.016737


In [11]:
tmp.query('model=="ICA1_10_0.01"')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,F1rr,F1rr,recovery,recovery,relevance,relevance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,mean,std,mean,std
model,size,n_sources,n_samples,noise,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ICA1_10_0.01,100,5,20,0.0,0.794619,0.00731,0.998039,0.002773,0.660097,0.008875
ICA1_10_0.01,100,5,20,0.2,0.119412,0.020375,0.21403,0.019039,0.082954,0.016737
ICA1_10_0.01,100,5,20,0.4,0.067092,0.009022,0.183931,0.011915,0.041056,0.006154
