In [2]:
import numpy as np
import pandas as pd
import os

# Load Ground Truth connections for real data
connection_files = [f'data/connections/{data_size}/{file}' for data_size in os.listdir('data/connections') for file in os.listdir(os.path.join('data/connections', data_size))]

connections = {file.split('/')[-2]+'_'+file.split('/')[-1].split('.')[0]: np.array(pd.read_csv(file, index_col='Unnamed: 0'), dtype=np.int64) for file in connection_files}

In [3]:
def accuracy(GC_est, GC_gt):
    GC_gt = GC_gt[:GC_est.shape[0], :GC_est.shape[0]]
    equality = np.array(GC_est.ravel() == GC_gt.ravel(), dtype=int)
    return sum(equality) / len(equality)

def calculate_real_data_accuracy(results, experiment_name):
    global connections
    GC_est = results['GC'] if 'GC' in results.keys() else results['GC_est']
    GC_est_np = np.array(GC_est)

    # Get connections for this experiment
    if experiment_name.startswith('6') or experiment_name.startswith('7'):
        connections_ = {k:v for (k,v) in connections.items() if k.startswith('full_dataset')}
    elif experiment_name.startswith('8'):
        connections_ = {k:v for (k,v) in connections.items() if k.startswith('part_dataset')}
    else:
        raise FileNotFoundError("There is no connectivity matrix for this experiment.")

    accs = {connection_name+'_acc':accuracy(GC_est_np, connection) for (connection_name, connection) in connections_.items()}
    return accs

In [4]:
import json
import os

def get_results(experiment: str, attrs: list, result_files: list = None):
    if result_files is None:
        path = f'results/{experiment}'
        result_files = [f'results/{experiment}/{model_name}/results.json' for model_name in os.listdir(path) if os.path.isdir(os.path.join(path, model_name))]

    results = {}
    for i, result_file_ in enumerate(result_files):
        with open(result_file_) as f:
            results_file = json.load(f)

            for config_part, attr in attrs:
                # To calculate accuracy for real data
                if (experiment.startswith('6') or experiment.startswith('7') or experiment.startswith('8')) and attr == 'accuracy':
                    # Calculate accuracies for different graphs and different experiments in this file
                    experiments_accs = [calculate_real_data_accuracy(results['model_results'], experiment) for results in results_file.values()]

                    # Reformat the accuracies
                    accs = {k: [] for k in experiments_accs[0].keys()}
                    for experiment_accs in experiments_accs:
                        for acc_type, acc_value in experiment_accs.items():
                            accs[acc_type].append(acc_value)

                    # Add accuracies to the results
                    for acc_type, acc_vals in accs.items():
                        if acc_type not in results:
                            results[acc_type] = acc_vals
                        else:
                            results[acc_type] += acc_vals

                    continue

                # To get the rest of desired experiment results
                if experiment.startswith('1a') and attr == 'prior':
                    l = [result_exp[config_part]['nri'][attr] for result_exp in results_file.values() if result_exp['experiment_config']['c11'] == result_exp['experiment_config']['c22']]
                else:
                    l = [result_exp[config_part][attr] for result_exp in results_file.values() if result_exp['experiment_config']['c11'] == result_exp['experiment_config']['c22']] \
                        if experiment.startswith('1') else [result_exp[config_part][attr] for result_exp in results_file.values()]

                if attr in results:
                    results[attr] += l
                else:
                    results[attr] = l

    return results

# Experiment 0

In [11]:
import pandas as pd

save_loaded_results = False

In [12]:
# Get data
experiment = '0'
attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'trainset_size'),
    ('experiment_config', 'tvt_split')
]

results = get_results(experiment, attrs)

In [13]:
# Save data in csv
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,trainset_size,tvt_split
0,ngc,0.75,1,500,0.1
1,ngc,1.00,101,500,0.1
2,ngc,1.00,33,500,0.1
3,ngc,0.75,12,500,0.1
4,ngc,1.00,0,500,0.1
...,...,...,...,...,...
595,tvar,1.00,1,5000,1.0
596,tvar,1.00,101,5000,1.0
597,tvar,1.00,33,5000,1.0
598,tvar,1.00,12,5000,1.0


# Experiment 1a / 1b / 1c / 1d

In [58]:
import pandas as pd

save_loaded_results = True

In [74]:
# Get data
experiment = '1c'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22')
]
if experiment == '1a':
    attrs.append(('experiment_config', 'prior'))

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22'])

In [75]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22
0,ngc,1.0,1,0.8,0.0,0.0,0.8
1,ngc,1.0,101,0.8,0.0,0.0,0.8
2,ngc,1.0,33,0.8,0.0,0.0,0.8
3,ngc,1.0,12,0.8,0.0,0.0,0.8
4,ngc,1.0,0,0.8,0.0,0.0,0.8
...,...,...,...,...,...,...,...
195,tvar,1.0,1,0.8,0.9,0.0,0.8
196,tvar,1.0,101,0.8,0.9,0.0,0.8
197,tvar,1.0,33,0.8,0.9,0.0,0.8
198,tvar,1.0,12,0.8,0.9,0.0,0.8


# Experiment 2

In [65]:
import pandas as pd

save_loaded_results = True

In [66]:
# Get data
experiment = '2'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
    ('experiment_config', 'sigma_eta_diag'),
    ('experiment_config', 'sigma_eta_off_diag')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22', 'sigma_eta_diag', 'sigma_eta_off_diag'])

In [67]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22,sigma_eta_diag,sigma_eta_off_diag
0,ngc,1.0,1,0.8,0.0,0,0.8,0.01,0.00
1,ngc,1.0,101,0.8,0.0,0,0.8,0.01,0.00
2,ngc,1.0,33,0.8,0.0,0,0.8,0.01,0.00
3,ngc,1.0,12,0.8,0.0,0,0.8,0.01,0.00
4,ngc,1.0,0,0.8,0.0,0,0.8,0.01,0.00
...,...,...,...,...,...,...,...,...,...
155,tvar,1.0,1,0.8,0.8,0,0.8,0.02,0.02
156,tvar,1.0,101,0.8,0.8,0,0.8,0.02,0.02
157,tvar,1.0,33,0.8,0.8,0,0.8,0.02,0.02
158,tvar,1.0,12,0.8,0.8,0,0.8,0.02,0.02


# Experiment 3

In [86]:
import pandas as pd

save_loaded_results = True

In [88]:
# Get data
experiment = '3'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'n_data'),
    ('model_results', 'time')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'n_data', 'time'])

In [89]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,n_data,time
0,ngc,1.000000,1,2,150.149770
1,ngc,1.000000,101,2,150.534178
2,ngc,1.000000,33,2,151.334934
3,ngc,1.000000,12,2,151.996004
4,ngc,1.000000,0,2,150.816408
...,...,...,...,...,...
155,tvar,1.000000,1,8,683.885257
156,tvar,1.000000,101,8,697.559011
157,tvar,0.968750,33,8,580.896820
158,tvar,0.968750,12,8,654.793177


# Experiment 4

In [6]:
import pandas as pd

save_loaded_results = False

In [11]:
# Get data
experiment = '4'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
    ('experiment_config', 'sparsity')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22', 'sparsity'])

In [12]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22,sparsity
0,ngc,1.0,1,0.8,0.0,0.0,0.8,0.125
1,ngc,1.0,101,0.8,0.0,0.0,0.8,0.125
2,ngc,1.0,33,0.8,0.0,0.0,0.8,0.125
3,ngc,1.0,12,0.8,0.0,0.0,0.8,0.125
4,ngc,1.0,0,0.8,0.0,0.0,0.8,0.125
...,...,...,...,...,...,...,...,...
155,tvar,1.0,1,0.8,0.0,0.0,0.8,1.000
156,tvar,1.0,101,0.8,0.0,0.0,0.8,1.000
157,tvar,1.0,33,0.8,0.0,0.0,0.8,1.000
158,tvar,1.0,12,0.8,0.0,0.0,0.8,1.000


# Experiment 5

In [82]:
import pandas as pd

save_loaded_results = True

In [83]:
# Get data
experiment = '5'
path = f'results/{experiment}'
result_files = [f'results/{experiment}/{model_name}/{ex}/results.json' for model_name in os.listdir(path) if os.path.isdir(os.path.join(path, model_name)) for ex in os.listdir(os.path.join(path, model_name)) if os.path.isdir(os.path.join(path, model_name, ex))]

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
]

results = get_results(experiment, attrs, result_files)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22'])

In [84]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/5/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/5/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22
0,ngc,0.75,1,0.9,0.4,0.0,0.9
1,ngc,0.75,101,0.9,0.4,0.0,0.9
2,ngc,0.75,33,0.9,0.4,0.0,0.9
3,ngc,0.75,12,0.9,0.4,0.0,0.9
4,ngc,0.75,0,0.9,0.4,0.0,0.9
...,...,...,...,...,...,...,...
75,tvar,0.50,1,0.8,0.1,0.1,0.8
76,tvar,0.50,101,0.8,0.1,0.1,0.8
77,tvar,0.00,33,0.8,0.1,0.1,0.8
78,tvar,0.50,12,0.8,0.1,0.1,0.8


# Experiment 6

In [29]:
import pandas as pd

save_loaded_results = False

In [30]:
# Get data
experiment = '6'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'tvt_split')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'full_dataset_bi_hvar_acc', 'full_dataset_full_hvar_acc', 'full_dataset_pds_lm_hvar_acc', 'seed', 'tvt_split'])

In [31]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,full_dataset_bi_hvar_acc,full_dataset_full_hvar_acc,full_dataset_pds_lm_hvar_acc,seed,tvt_split
0,ngc,1.0,0.125556,0.136667,1,0.1
1,ngc,1.0,0.125556,0.136667,101,0.1
2,ngc,1.0,0.125556,0.136667,33,0.1
3,ngc,1.0,0.125556,0.136667,12,0.1
4,ngc,1.0,0.125556,0.136667,0,0.1
...,...,...,...,...,...,...
95,tvar,0.0,0.874444,0.863333,1,1.0
96,tvar,0.0,0.874444,0.863333,101,1.0
97,tvar,0.0,0.874444,0.863333,33,1.0
98,tvar,0.0,0.874444,0.863333,12,1.0


# Experiment 7

In [32]:
import pandas as pd

save_loaded_results = False

In [33]:
# Get data
experiment = '7'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'tvt_split')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'full_dataset_bi_hvar_acc', 'full_dataset_full_hvar_acc', 'full_dataset_pds_lm_hvar_acc', 'seed', 'tvt_split'])

In [34]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,full_dataset_bi_hvar_acc,full_dataset_full_hvar_acc,full_dataset_pds_lm_hvar_acc,seed,tvt_split
0,ngc,1.0,0.125556,0.136667,1,0.1
1,ngc,1.0,0.125556,0.136667,101,0.1
2,ngc,1.0,0.125556,0.136667,33,0.1
3,ngc,1.0,0.125556,0.136667,12,0.1
4,ngc,1.0,0.125556,0.136667,0,0.1
5,ngc0,1.0,0.125556,0.136667,1,0.1
6,ngc0,1.0,0.125556,0.136667,101,0.1
7,ngc0,1.0,0.125556,0.136667,33,0.1
8,ngc0,1.0,0.125556,0.136667,12,0.1
9,ngc0,1.0,0.125556,0.136667,0,0.1


# Experiment 8

In [35]:
import pandas as pd

save_loaded_results = False

In [36]:
# Get data
experiment = '8'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'tvt_split')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'part_dataset_bi_hvar_acc', 'part_dataset_full_hvar_acc', 'part_dataset_pds_lm_hvar_acc', 'seed', 'tvt_split'])

In [37]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,part_dataset_bi_hvar_acc,part_dataset_full_hvar_acc,part_dataset_pds_lm_hvar_acc,seed,tvt_split
0,ngc,0.132222,0.038889,0.128889,1,0.1
1,ngc,0.132222,0.038889,0.128889,101,0.1
2,ngc,0.132222,0.038889,0.128889,33,0.1
3,ngc,0.132222,0.038889,0.128889,12,0.1
4,ngc,0.132222,0.038889,0.128889,0,0.1
5,ngc0,0.132222,0.038889,0.128889,1,0.1
6,ngc0,0.132222,0.038889,0.128889,101,0.1
7,ngc0,0.132222,0.038889,0.128889,33,0.1
8,ngc0,0.132222,0.038889,0.128889,12,0.1
9,ngc0,0.132222,0.038889,0.128889,0,0.1


# Experiment 9

In [9]:
import pandas as pd

save_loaded_results = True

In [10]:
# Get data
experiment = '9'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
    ('experiment_config', 'sigma_eta_diag'),
    ('experiment_config', 'sigma_eta_off_diag')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22', 'sigma_eta_diag', 'sigma_eta_off_diag'])

In [11]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22,sigma_eta_diag,sigma_eta_off_diag
0,ngc_noise,1.0,1,0.8,0,0,0.8,0.01,0.0
1,ngc_noise,1.0,101,0.8,0,0,0.8,0.01,0.0
2,ngc_noise,1.0,33,0.8,0,0,0.8,0.01,0.0
3,ngc_noise,1.0,12,0.8,0,0,0.8,0.01,0.0
4,ngc_noise,1.0,0,0.8,0,0,0.8,0.01,0.0
5,ngc_noise,0.5,1,0.8,0,0,0.8,0.01,0.01
6,ngc_noise,0.5,101,0.8,0,0,0.8,0.01,0.01
7,ngc_noise,0.5,33,0.8,0,0,0.8,0.01,0.01
8,ngc_noise,0.5,12,0.8,0,0,0.8,0.01,0.01
9,ngc_noise,0.5,0,0.8,0,0,0.8,0.01,0.01
