In [206]:
import numpy as np
import pandas as pd
import os

# Load Ground Truth connections for real data
connection_files = [f'data/connections/{data_size}/{file}' for data_size in os.listdir('data/connections') for file in os.listdir(os.path.join('data/connections', data_size))]

connections = {file.split('/')[-2]+'_'+file.split('/')[-1].split('.')[0]: np.array(pd.read_csv(file, index_col='Unnamed: 0'), dtype=np.int64) for file in connection_files}

In [207]:
def accuracy(GC_est, GC_gt):
    return sum(GC_est.ravel() == GC_gt.ravel()) / len(GC_est.ravel())

def calculate_real_data_accuracy(GC_est, experiment_name):
    global connections
    # Get connections for this experiment
    if experiment_name.startswith('6') or experiment_name.startswith('7'):
        connections = {k:v for (k,v) in connections.items() if k.startswith('full_dataset')}
    elif experiment_name.startswith('8'):
        connections = {k:v for (k,v) in connections.items() if k.startswith('part_dataset')}
    else:
        raise FileNotFoundError("There is no connectivity matrix for this experiment.")

    GC_est_np = np.array(GC_est)
    accs = {connection_name+'_acc':accuracy(GC_est_np, connection) for (connection_name, connection) in connections.items()}
    return accs

In [208]:
import json
import os

def get_results(experiment: str, attrs: list, result_files: list = None):
    if result_files is None:
        path = f'results/{experiment}'
        result_files = [f'results/{experiment}/{model_name}/results.json' for model_name in os.listdir(path) if os.path.isdir(os.path.join(path, model_name))]

    results = {}
    for i, result_file_ in enumerate(result_files):
        with open(result_file_) as f:
            results_file = json.load(f)

            for config_part, attr in attrs:
                # To calculate accuracy for real data
                if (experiment.startswith('6') or experiment.startswith('7') or experiment.startswith('8')) and attr == 'accuracy':
                    # Calculate accuracies for different graphs and different experiments in this file
                    experiments_accs = [calculate_real_data_accuracy(results['model_results']['GC_est'], experiment) for results in results_file.values()]

                    # Reformat the accuracies
                    accs = {k: [] for k in experiments_accs[0].keys()}
                    for experiment_accs in experiments_accs:
                        for acc_type, acc_value in experiment_accs.items():
                            accs[acc_type].append(acc_value)

                    # Add accuracies to the results
                    for acc_type, acc_vals in accs.items():
                        if acc_type not in results:
                            results[acc_type] = acc_vals
                        else:
                            results[acc_type] += acc_vals

                    continue

                # To get the rest of desired experiment results
                l = [result_exp[config_part][attr] for result_exp in results_file.values() if result_exp['experiment_config']['c11'] == result_exp['experiment_config']['c22']] \
                    if experiment.startswith('1') else [result_exp[config_part][attr] for result_exp in results_file.values()]
                if attr in results:
                    results[attr] += l
                else:
                    results[attr] = l

    return results

# Experiment 0

In [219]:
import pandas as pd

save_loaded_results = False

In [217]:
# Get data
experiment = '0'
attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'trainset_size'),
    ('experiment_config', 'tvt_split')
]

results = get_results(experiment, attrs)

In [218]:
# Save data in csv
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_results_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_results_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,trainset_size,tvt_split
0,ngc,0.75,1,500,0.1
1,ngc,1.00,101,500,0.1
2,ngc,1.00,33,500,0.1
3,ngc,0.75,12,500,0.1
4,ngc,1.00,0,500,0.1
...,...,...,...,...,...
595,tvar,1.00,1,5000,1.0
596,tvar,1.00,101,5000,1.0
597,tvar,1.00,33,5000,1.0
598,tvar,1.00,12,5000,1.0


# Experiment 1a / 1b / 1c / 1d

In [230]:
import pandas as pd

save_loaded_results = False

In [228]:
# Get data
experiment = '1a'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22'])

In [229]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_results_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_results_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22
0,ngc,1.00,1,0.4,0.0,0.0,0.4
1,ngc,1.00,101,0.4,0.0,0.0,0.4
2,ngc,0.50,33,0.4,0.0,0.0,0.4
3,ngc,0.75,12,0.4,0.0,0.0,0.4
4,ngc,0.50,0,0.4,0.0,0.0,0.4
...,...,...,...,...,...,...,...
115,tvar,1.00,1,0.4,0.5,0.5,0.4
116,tvar,1.00,101,0.4,0.5,0.5,0.4
117,tvar,1.00,33,0.4,0.5,0.5,0.4
118,tvar,1.00,12,0.4,0.5,0.5,0.4


# Experiment 2

In [234]:
import pandas as pd

save_loaded_results = False

In [232]:
# Get data
experiment = '2'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
    ('experiment_config', 'sigma_eta_diag'),
    ('experiment_config', 'sigma_eta_off_diag')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22', 'sigma_eta_diag', 'sigma_eta_off_diag'])

In [233]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22,sigma_eta_diag,sigma_eta_off_diag
0,ngc,1.0,1,0.8,0.0,0,0.8,0.01,0.00
1,ngc,1.0,101,0.8,0.0,0,0.8,0.01,0.00
2,ngc,1.0,33,0.8,0.0,0,0.8,0.01,0.00
3,ngc,1.0,12,0.8,0.0,0,0.8,0.01,0.00
4,ngc,1.0,0,0.8,0.0,0,0.8,0.01,0.00
...,...,...,...,...,...,...,...,...,...
155,tvar,1.0,1,0.8,0.8,0,0.8,0.02,0.02
156,tvar,1.0,101,0.8,0.8,0,0.8,0.02,0.02
157,tvar,1.0,33,0.8,0.8,0,0.8,0.02,0.02
158,tvar,1.0,12,0.8,0.8,0,0.8,0.02,0.02


# Experiment 3

In [240]:
import pandas as pd

save_loaded_results = False

In [238]:
# Get data
experiment = '3'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
    ('experiment_config', 'n_data'),
    ('model_results', 'date')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22', 'n_data', 'date'])

In [239]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22,n_data,date
0,ngc,1.000000,1,0.0,0.0,0.0,0.0,2,23-11-2022 00:40:45
1,ngc,1.000000,101,0.0,0.0,0.0,0.0,2,23-11-2022 00:43:16
2,ngc,1.000000,33,0.0,0.0,0.0,0.0,2,23-11-2022 00:45:47
3,ngc,1.000000,12,0.0,0.0,0.0,0.0,2,23-11-2022 00:48:20
4,ngc,1.000000,0,0.0,0.0,0.0,0.0,2,23-11-2022 00:50:51
...,...,...,...,...,...,...,...,...,...
155,tvar,1.000000,1,0.8,0.0,0.0,0.8,8,28-11-2022 18:28:16
156,tvar,1.000000,101,0.8,0.0,0.0,0.8,8,28-11-2022 18:39:55
157,tvar,0.968750,33,0.8,0.0,0.0,0.8,8,28-11-2022 18:49:37
158,tvar,0.968750,12,0.8,0.0,0.0,0.8,8,28-11-2022 19:00:35


# Experiment 4

In [244]:
import pandas as pd

save_loaded_results = False

In [245]:
# Get data
experiment = '4'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
    ('experiment_config', 'sparsity')
]

results = get_results(experiment, attrs)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22', 'sparsity'])

In [246]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_data_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_data_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22,sparsity
0,ngc,1.0,1,0.8,0.0,0.0,0.8,0.125
1,ngc,1.0,101,0.8,0.0,0.0,0.8,0.125
2,ngc,1.0,33,0.8,0.0,0.0,0.8,0.125
3,ngc,1.0,12,0.8,0.0,0.0,0.8,0.125
4,ngc,1.0,0,0.8,0.0,0.0,0.8,0.125
...,...,...,...,...,...,...,...,...
155,tvar,1.0,1,0.8,0.0,0.0,0.8,1.000
156,tvar,1.0,101,0.8,0.0,0.0,0.8,1.000
157,tvar,1.0,33,0.8,0.0,0.0,0.8,1.000
158,tvar,1.0,12,0.8,0.0,0.0,0.8,1.000


# Experiment 5

In [250]:
import pandas as pd

save_loaded_results = False

In [251]:
# Get data
experiment = '5'
path = f'results/{experiment}'
result_files = [f'results/{experiment}/{model_name}/{ex}/results.json' for model_name in os.listdir(path) if os.path.isdir(os.path.join(path, model_name)) for ex in os.listdir(os.path.join(path, model_name)) if os.path.isdir(os.path.join(path, model_name, ex))]

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'c11'),
    ('experiment_config', 'c12'),
    ('experiment_config', 'c21'),
    ('experiment_config', 'c22'),
]

results = get_results(experiment, attrs, result_files)
results.keys()

dict_keys(['model', 'accuracy', 'seed', 'c11', 'c12', 'c21', 'c22'])

In [252]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/5/processed_results_{experiment}.csv')
    df = pd.read_csv(f'results/5/processed_results_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,accuracy,seed,c11,c12,c21,c22
0,ngc,0.75,1,0.9,0.4,0.0,0.9
1,ngc,0.75,101,0.9,0.4,0.0,0.9
2,ngc,0.75,33,0.9,0.4,0.0,0.9
3,ngc,0.75,12,0.9,0.4,0.0,0.9
4,ngc,0.75,0,0.9,0.4,0.0,0.9
...,...,...,...,...,...,...,...
75,tvar,0.50,1,0.9,0.0,0.0,0.9
76,tvar,0.50,101,0.9,0.0,0.0,0.9
77,tvar,0.25,33,0.9,0.0,0.0,0.9
78,tvar,0.50,12,0.9,0.0,0.0,0.9


# Experiment 6

In [187]:
import pandas as pd

save_loaded_results = False

In [195]:
# Get data
experiment = '6'

attrs = [
    ('experiment_config', 'model'),
    ('model_results', 'accuracy'),
    ('experiment_config', 'seed'),
    ('experiment_config', 'tvt_split')
]

result_files = [
    'results/6/ngc/results.json',
    'results/6/ngc0/results.json'
]
results = get_results(experiment, attrs, result_files)
results.keys()

dict_keys(['model', 'full_dataset_bi_hvar_acc', 'full_dataset_full_hvar_acc', 'full_dataset_pds_lm_hvar_acc', 'seed', 'tvt_split'])

In [196]:
df = pd.DataFrame(data=results)
if save_loaded_results:
    df.to_csv(f'results/{experiment}/processed_results_{experiment}.csv')
    df = pd.read_csv(f'results/{experiment}/processed_results_{experiment}.csv', index_col='Unnamed: 0')
df

Unnamed: 0,model,full_dataset_bi_hvar_acc,full_dataset_full_hvar_acc,full_dataset_pds_lm_hvar_acc,seed,tvt_split
0,ngc,0.0,0.874444,0.863333,1,0.1
1,ngc,0.0,0.874444,0.863333,1,0.2
2,ngc,0.0,0.874444,0.863333,1,0.5
3,ngc,0.0,0.874444,0.863333,1,0.8
4,ngc,0.0,0.874444,0.863333,1,1.0
5,ngc0,0.0,0.874444,0.863333,1,0.1
6,ngc0,0.0,0.874444,0.863333,1,0.2
7,ngc0,0.0,0.874444,0.863333,1,0.5
8,ngc0,0.0,0.874444,0.863333,1,0.8
9,ngc0,0.0,0.874444,0.863333,1,1.0
