In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import numpy as np
from math import log
from CIoTS import *
import json
from time import time
import random

In [6]:
runs = 20
p_range = [5, 10, 15, 20] 
dimensions_range = [3, 5, 10]
in_edges_range = [2, 4, 6]
data_length = 10000
test_length = 2000
alpha = 0.05
auto_corr = 0.75
patience = 2*max(p_range)
p_max = 2*max(p_range)

result_dir = 'results/p_estimation/'

In [None]:
#### PC1 incremental

results_pc1 = []
for incoming_edges in in_edges_range:
    for p in p_range:
        for dimensions in dimensions_range:
            setting = {'dim': dimensions, 'p': p, 'in_edges': incoming_edges, 'auto_corr': auto_corr,
                       'patience': patience, 'p_max': p_max, 'alpha': alpha, 'experiments': []}
            for run in range(runs):
                generator = CausalTSGenerator(dimensions=dimensions, max_p=p, data_length=data_length+test_length,
                                              incoming_edges=incoming_edges, autocorrelation=auto_corr)
                ts_all = generator.generate()
                ts = ts_all[:data_length]
                ts_test = ts_all[data_length:]

                rand_suffix = random.randint(0, 2e16-1)
                csv_name = f'data/{p}_{dimensions}_{incoming_edges}_{run}_{rand_suffix}.csv'
                csv_test_name = f'data/test_{p}_{dimensions}_{incoming_edges}_{run}_{rand_suffix}.csv'
                
                experiment = {'true_graph': nx.to_dict_of_lists(generator.graph), 'data': csv_name,
                              'test_data': csv_test_name, 'iterations': []}

                _, graphs, times, bics, _, _ = pc_incremental_pc1(partial_corr_test, ts, alpha, p_max, 
                                                                  verbose=True, patiency=patience)
                
                for p_iter, graph in graphs.items():
                    test_var = VAR(p_iter)
                    nm, dm = transform_ts(ts, p_iter)
                    test_var.fit_from_graph(dimensions, dm, graph, nm)
                    test_mse, test_bic = test_var.evaluate_test_set(ts_test[:p_iter], ts_test[p_iter:])
                    experiment['iterations'].append({'p_iter': p_iter, 'bic': bics[p_iter], 'time': times[p_iter],
                                                     'graph': nx.to_dict_of_lists(graph), 'test_mse': test_mse,
                                                     'test_bic': test_bic})

                ts.to_csv(result_dir+csv_name, index=False)
                ts_test.to_csv(result_dir+csv_test_name, index=False)
                setting['experiments'].append(experiment)

                
            print(f'{incoming_edges} {p} {dimensions} done')
            
            results_pc1.append(setting)
            with open(result_dir+'estimates_pc1.json', 'w') as f:
                json.dump(results_pc1, f)

2 5 3 done


In [None]:
### PC1 with VAR for p estimation

with open(result_dir+'estimates_pc1.json', 'r') as f:
    results_var = json.load(f)

for setting in results_var:
    for experiment in setting['experiments']:
        del experiment['iterations']    
    
for setting in results_var:
    for experiment in setting['experiments']:
        experiment['estimates'] = []
        ts = pd.read_csv(result_dir+experiment['data'])
        ts_test = pd.read_csv(result_dir+experiment['test_data'])
        
        start_time = time()
        var_ranking, var_scores = var_order_select(ts, setting['p_max'], ['bic']) 
        end_time = time()
        
        for p_est, score in zip(var_ranking['bic'], var_scores['bic']):
            _, graphs, times, bics, _, _ = pc_incremental_pc1(partial_corr_test, ts, setting['alpha'], p_est+1, 
                                                              verbose=True, patiency=0, steps=p_est)
            # based on data
            test_var = VAR(p_est)
            test_var.fit(ts)
            var_test_mse, var_test_bic = test_var.evaluate_test_set(ts_test[:p_est], ts_test[p_est:])
            
            # based on graph
            test_var = VAR(p_est)
            nm, dm = transform_ts(ts, p_est)
            test_var.fit_from_graph(setting['dim'], dm, graphs[p_est], nm)
            test_mse, test_bic = test_var.evaluate_test_set(ts_test[:p_est], ts_test[p_est:])
            
            experiment['estimates'].append({'p_est': p_est, 'time_pc1': times[p_est], 
                                            'time_var': end_time-start_time, 'graph': nx.to_dict_of_lists(graphs[p_est]),
                                            'bic': bics[p_est], 'var_bic': score, 
                                            'test_bic': test_bic, 'var_test_bic': var_test_bic,
                                            'test_mse': test_mse, 'var_test_mse': var_test_mse})
            
    incoming_edges = setting['in_edges']
    p = setting['p']
    print(f'{incoming_edges} {p} done')

    with open(result_dir+'estimates_var.json', 'w') as f:
        json.dump(results_var, f)

In [None]:
### PC1 with cross correlation for p estimation

peak_range = [1, 2, 3]

with open(result_dir+'estimates_pc1.json', 'r') as f:
    results_cross_corr = json.load(f)

for setting in results_cross_corr:
    for experiment in setting['experiments']:
        del experiment['iterations']   
    
for setting in results_cross_corr:
    for experiment in setting['experiments']:
        experiment['estimates'] = []
        ts = pd.read_csv(result_dir+experiment['data'])
        
        for peaks in peak_range:
            
            start_time = time()
            p_est = int(cross_corr_peaks(ts, n_peaks=peaks))
            _, graphs, times, bics, _, _ = pc_incremental_pc1(partial_corr_test, ts, setting['alpha'], p_est+1, 
                                                              verbose=True, patiency=0, steps=p_est)
            end_time = time()
            experiment['estimates'].append({'p_est': p_est, 'bic': bics[p_est], 'time_pc1': times[p_est], 'peak': peaks,
                                            'time': end_time-start_time, 'graph': nx.to_dict_of_lists(graphs[p_est])})
            
    incoming_edges = setting['in_edges']
    p = setting['p']
    print(f'{incoming_edges} {p} done')

    with open(result_dir+'estimates_cross_corr.json', 'w') as f:
        json.dump(results_cross_corr, f)

In [None]:
5