In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import numpy as np
from math import log
from CIoTS import *
from tqdm import trange
import json
from time import time

In [None]:
runs = 20
max_p = 20
dimensions = 3
data_length = 10000
alpha = 0.05
incoming_edges = 3

In [None]:
results = pd.DataFrame(columns=['run', 'p', 'method', 'iteration p', 'f1', 'fpr', 'precision', 'recall', 'bic', 'time'])

In [None]:
data = []
for p in trange(2, max_p+2, 2):
    for run in trange(runs):        
        # generate graph and data
        generator = CausalTSGenerator(dimensions=dimensions, max_p=p, data_length=data_length,
                                      incoming_edges=incoming_edges)
        ts = generator.generate()
        data.append({'graph': generator.graph, 'ts': ts})
        
        # incremental pc
        f1 = []
        fpr = []
        precision = []
        recall = []
        p_iters = []
        time_iters = []
        bic_iters = []
        _, graphs, times, bics = pc_incremental(partial_corr_test, ts, alpha, 2*max_p, 
                                                verbose=True, patiency=2*max_p)
        for p_iter, g in graphs.items():
            eval_result = evaluate_edges(generator.graph, g)
            f1.append(eval_result['f1-score'])
            precision.append(eval_result['precision'])
            recall.append(eval_result['TPR'])
            fpr.append(eval_result['FPR'])
            p_iters.append(p_iter)
            time_iters.append(times[p_iter])
            bic_iters.append(bics[p_iter])
        results = results.append(pd.DataFrame({'run': [run]*len(f1), 'p': [p]*len(f1), 'iteration p': p_iters, 
                                               'f1': f1, 'fpr': fpr, 'recall': recall, 'precision': precision, 
                                               'bic': bic_iters, 'time': time_iters, 'method': ['incremental']*len(f1)}), 
                                 ignore_index=True, sort=True)
        
        
        # incremental pc extensive
        f1 = []
        fpr = []
        precision = []
        recall = []
        p_iters = []
        time_iters = []
        bic_iters = []
        _, graphs, times, bics = pc_incremental_extensive(partial_corr_test, ts, alpha, 2*max_p, 
                                                          verbose=True, patiency=2*max_p)
        for p_iter, g in graphs.items():
            eval_result = evaluate_edges(generator.graph, g)
            f1.append(eval_result['f1-score'])
            precision.append(eval_result['precision'])
            recall.append(eval_result['TPR'])
            fpr.append(eval_result['FPR'])
            p_iters.append(p_iter)
            time_iters.append(times[p_iter])
            bic_iters.append(bics[p_iter])
        results = results.append(pd.DataFrame({'run': [run]*len(f1), 'p': [p]*len(f1), 'iteration p': p_iters, 
                                               'f1': f1, 'fpr': fpr, 'recall': recall, 'precision': precision, 
                                               'bic': bic_iters, 'time': time_iters, 'method': ['extensive']*len(f1)}), 
                                 ignore_index=True)
        
        # incremental pc subsets
        f1 = []
        fpr = []
        precision = []
        recall = []
        p_iters = []
        time_iters = []
        bic_iters = []
        _, graphs, times, bics = pc_incremental_subsets(partial_corr_test, ts, alpha, 2*max_p, 
                                                        verbose=True, patiency=2*max_p)
        for p_iter, g in graphs.items():
            eval_result = evaluate_edges(generator.graph, g)
            f1.append(eval_result['f1-score'])
            precision.append(eval_result['precision'])
            recall.append(eval_result['TPR'])
            fpr.append(eval_result['FPR'])
            p_iters.append(p_iter)
            time_iters.append(times[p_iter])
            bic_iters.append(bics[p_iter])
        results = results.append(pd.DataFrame({'run': [run]*len(f1), 'p': [p]*len(f1), 'iteration p': p_iters, 
                                               'f1': f1, 'fpr': fpr, 'recall': recall, 'precision': precision, 
                                               'bic': bic_iters, 'time': time_iters, 'method': ['subsets']*len(f1)}), 
                                 ignore_index=True)
        results.to_csv('results/iterations/result.csv', index=False)

In [None]:
def dump_data(data, file):
    json_data = []
    for d in data:
        json_data.append({'graph': nx.to_dict_of_lists(d['graph']), 'ts': d['ts'].to_dict()})
    with open(file, 'w+') as fp:
        json.dump(json_data, fp)
        
def load_data(file):
    data = []
    with open(file, 'r') as fp:
        json_data = json.load(fp)
    for d in json_data:
        graph = nx.from_dict_of_lists(d['graph'], nx.DiGraph())
        ts = pd.DataFrame.from_dict(d['ts'])
        ts.index = ts.index.astype(int)
        ts = ts.sort_index()
        data.append({'graph': graph,'ts': ts})
    return data

In [None]:
# dump_data(data, 'results/iterations/data.json')

In [65]:
loaded_data = load_data('results/iterations/data.json')

In [None]:
comp_results = pd.DataFrame(columns=['run', 'p', 'method','iteration p', 'f1', 'fpr', 'precision', 'recall', 'time'])
for i in trange(len(loaded_data)):
    graph = loaded_data[i]['graph']
    ts = loaded_data[i]['ts']
    run = i % 20
    p = int(len(graph.nodes())/dimensions - 1)
    
    start_time = time()
    predicted_graph = pc_chen_modified(partial_corr_test, ts, p, alpha)
    runtime = time() - start_time
    eval_result = evaluate_edges(graph, predicted_graph)
    comp_results = comp_results.append({'run': run, 'p': p, 'iteration p': p, 'method': 'real',
                                        'f1': eval_result['f1-score'],
                                        'precision': eval_result['precision'],
                                        'recall': eval_result['TPR'],
                                        'fpr': eval_result['FPR'],
                                        'time': runtime},
                                       ignore_index=True)
    
    
    start_time = time()
    var_ranking, _ = var_order_select(ts, 2*(max_p-2), ['bic'])
    p_est = var_ranking['bic'][0]
    predicted_graph = pc_chen_modified(partial_corr_test, ts, p_est, alpha)
    runtime = time() - start_time
    eval_result = evaluate_edges(graph, predicted_graph)
    comp_results = comp_results.append({'run': run, 'p': p, 'iteration p': p_est, 'method': 'bic', 
                                        'f1': eval_result['f1-score'],
                                        'precision': eval_result['precision'],
                                        'recall': eval_result['TPR'],
                                        'fpr': eval_result['FPR'],
                                        'time': runtime},
                                       ignore_index=True)
    comp_results.to_csv('results/iterations/comp_result.csv', index=False)




  0%|          | 0/200 [00:00<?, ?it/s][A[A[A


  0%|          | 1/200 [00:03<11:05,  3.34s/it][A[A[A


  1%|          | 2/200 [00:05<08:38,  2.62s/it][A[A[A


  2%|▏         | 3/200 [00:07<08:27,  2.58s/it][A[A[A