In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
if not os.getcwd().endswith('CIoTS'):
    os.chdir('../..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from CIoTS import *

  return f(*args, **kwds)
  return f(*args, **kwds)


Could not import r-package RCIT
Could not import r-package acepack for GPACE, use python ACE package
Could not import python ACE package for GPACE


### Helper: check if data exists

In [3]:
import os.path

def check_setups(setups, data_path):
    return not missing_setups(setups, data_path)

def missing_setups(setups, data_path):
    missing = []
    for dim, in_edges, tau, autocorr, _, run in setups:
        if not os.path.isfile(data_path + f't={tau}_d={dim}_in={in_edges}_autocorr={autocorr}_{run}.pickle'):
            missing.append((dim, in_edges, tau, autocorr, run))
    return missing

# Execution

In [4]:
from itertools import product

dimensions = [3,5,10]
incoming_edges = [2,3,4]
taus = [5,10,15,20]
autocorrs = [False, True]
data_length = [10000]
runs = range(10)


setups = list(product(dimensions, incoming_edges, taus, autocorrs, data_length, runs))

## Runtime for inc vs. non-inc

PC1 iterative vs. PC1 non-iterative iterative

Also check for $\tau - k$ and $\tau + k$

In [None]:
import pickle
from time import time

data_path = 'notebooks/ICML/icml_data_v2/'
results_path = 'notebooks/ICML/icml_results_v2/'
results = pd.DataFrame()
k = 2

if not check_setups(setups, data_path):
    print('Mising setups:')
    print(missing_setups(setups, data_path))

for dim, in_edges, tau, autocorr, _, run in setups:
    generator = pickle.load(open(data_path + f't={tau}_d={dim}_in={in_edges}_autocorr={autocorr}_{run}.pickle', 'rb'))
    
    df_dict = {'dimension': dim, 'max time lag': tau, 'incoming edges': in_edges, 'run': run, 'autocorr': autocorr}
    
    # incremental
    stopper = ICStopper(dim=dim, patiency=2)
    start_time = time()
    pc_incremental_pc1(tigramite_partial_corr_test, ts=generator.ts, max_p=2*tau, stopper=stopper, verbose=False)
    time_delta = time() - start_time
    df_dict['PC1 incremental - runtime'] = time_delta
    
    # non-incremental
    for offset in range(-k, k+1):
        offset_str = f'{offset:+}' if offset != 0 else ''
        start_time = time()
        pc_incremental_pc1(tigramite_partial_corr_test, ts=generator.ts, 
                           start=0, step=tau+offset, max_p=tau+offset, verbose=False)
        time_delta = time() - start_time
        df_dict[f'PC1 tau{offset_str} - runtime'] = time_delta

    results = results.append(df_dict, ignore_index=True)
    results.to_csv(results_path + f'experiment5b.csv', index=False)

  beta_hat = numpy.linalg.lstsq(z, y)[0]


## 5. Incremental vs Non-Incremental

# Visualization

## 1. Visualize $F_1$ for known $\tau$

In [None]:
from functools import reduce
import sys
eps = sys.float_info.epsilon

def plot_f1_groupedbarchart(df_dict, title):
    assert len(df_dict) > 0
    #assert reduce(lambda x,y: np.all(x == y), map(lambda e:e.columns, grouped_result.values()))
    bar_labels = [f'$\\tau + {c[1]}$' if c[1]>=0 else f'$\\tau - {abs(c[1])}$'
                  for c in next(iter(grouped_result.values())).columns]
    
    fig, ax = plt.subplots(figsize=(20,10))
    ind = np.arange(len(bar_labels))
    width = 0.7 / len(df_dict)
    
    plots = []
    color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
    for w_idx, (algo, f1s) in enumerate(df_dict.items()):
        p = ax.bar(ind + w_idx*width + (width / 2), np.mean(f1s, axis=0), width, 
                   color=color_cycle[w_idx % len(color_cycle)],
                   bottom=0, yerr=np.std(f1s, axis=0))
        plots.append(p[0])

    ax.set_title(title)
    ax.set_xticks(ind + (len(df_dict) * width) / 2)
    ax.set_xticklabels(bar_labels)

    ax.legend(plots, df_dict.keys())
    ax.autoscale_view()

    plt.show()

In [None]:
results_path = 'notebooks/ICML/icml_results_v2/'
algorithms = [(pc_incremental, 'PC incremental'),
              (pc_incremental_extensive, 'PC extensive'),
              (pc_incremental_pc1, 'PC1 incremental'),
              (pc_incremental_pc1mci, 'PCMCI incremental')]

results = pd.read_csv(results_path + 'experiment1.csv')
for group, result in results.groupby(['max time lag', 'autocorr']): # Group by ['dimension', 'incoming edges', 'autocorr'] as well?
    tau = int(group[0])
    autocorr = float(group[1])
    grouped_result = {}
    for algorithm, name in algorithms:
        algo_cols = [col for col in result.columns if col[:col.rfind('_')] == name]
        df = result[algo_cols].rename(lambda c:('f1', int(c.split('_')[-1][2:])), axis='columns')
        grouped_result[name] = df[sorted(df.columns)]
    plot_f1_groupedbarchart(grouped_result, title=f'$F_1$ score for known $\\tau$ = {tau}, autocorr = {autocorr}')

## 2. Visualize estimate $\hat{\tau}$ for unknown $\tau$

In [None]:
from math import floor

visualize = [(10, 2, False)]
names = ['PC1 incremental: BIC', 'PC1 incremental: Correlation',
         'VAR: BIC', 'PC incremental: BIC', 'PC extensive: BIC', 'PCMCI: BIC']

width = 0.4
results_path = 'notebooks/ICML/icml_results_v2/'
cols = [name + '_tau' for name in names]

results = pd.read_csv(results_path + 'experiment2.csv')
true_taus = np.unique(results['max time lag'])

for group, result in results.groupby(['dimension', 'incoming edges', 'autocorr']): 
    
    if group not in visualize:
        continue
    
    dim = int(group[0])
    in_edges = int(group[1])
    autocorr = float(group[2])
    
    colors = plt.cm.CMRmap(np.linspace(0,1,len(names)+2))
    plt.figure(dpi=200, figsize=(15, 8))
    
    
    for tau in true_taus:
        start_x = tau - (width+0.1)*(len(names))/2
        end_x = tau + (width+0.1)*(len(names))/2
        y = tau
        plt.hlines(y, start_x, end_x, colors='grey')
    
    bps = []
    for i, name in enumerate(names):
        positions = [tau + (width+0.1)*(i - (len(names)-1)/2) for tau in true_taus]
        arr = [result.loc[result['max time lag']==tau, cols[i]] for tau in true_taus]
        bp = plt.boxplot(arr, positions=positions, widths=width, showfliers=False,
                         whiskerprops={'color': colors[i+1]}, boxprops={'color': colors[i+1]},
                         capprops={'color': colors[i+1]}, medianprops={'color': 'black'})
        bps.append(bp)
    
    plt.xlabel('true $\\tau$')
    plt.ylabel('estimated $\\hat{\\tau}$')
    plt.xticks(true_taus, true_taus)
    plt.title(f'$\\tau$ estimation for dimensionality={dim}, incoming edges={in_edges}, autocorr={autocorr}')
    plt.xlim(min(true_taus)-(width+0.2)*len(names)/2, max(true_taus)+(width+0.2)*len(names)/2)
    
    plt.legend([bps[i]['whiskers'][0] for i in range(len(names))], names)
    plt.show()

## Varying patiency

In [None]:
def simulate_stopping(df, stopping, params, col):
    sorted_df = df.sort_values(by='tau estimate')
    idx = stopping.simulate(sorted_df[col].values, **params)
    return sorted_df['tau estimate'].iloc[idx]

from math import floor

visualize = [(10, 2, False)]

# schema: pc_version, stopper_class, params, column_suffix, name
algorithms = [('PC1 incremental', ICStopper, {'patiency': 2}, 'bics', 'PC1: BIC'),
              ('PCMCI incremental', ICStopper, {'patiency': 2}, 'bics', 'PCMCI: BIC'),
              ('PC extensive', ICStopper, {'patiency': 2}, 'bics', 'PC extensive: BIC'),
              ('PC1 incremental', CorrStopper, {'patiency': 2}, 'added_edges', 'PC1: Correlation'),
              ('PCMCI incremental', CorrStopper, {'patiency': 2}, 'added_edges', 'PCMCI: Correlation'),
              ('PC extensive', CorrStopper, {'patiency': 2}, 'added_edges', 'PC extensive: Correlation')]

width = 0.4
results_path = 'notebooks/ICML/icml_results_v2/'

names = [name for _, _, _, _, name in algorithms]
cols = [name + '_tau' for name in names]

results = pd.read_csv(results_path + 'experiment3.csv')
true_taus = np.unique(results['max time lag'])

for group, result in results.groupby(['dimension', 'incoming edges', 'autocorr']): 
    
    if group not in visualize:
        continue
    
    dim = int(group[0])
    in_edges = int(group[1])
    autocorr = float(group[2])
    
    colors = plt.cm.CMRmap(np.linspace(0,1,len(names)+2))
    plt.figure(dpi=200, figsize=(15, 8))
    
    for tau in true_taus:
        start_x = tau - (width+0.1)*(len(names))/2
        end_x = tau + (width+0.1)*(len(names))/2
        y = tau
        plt.hlines(y, start_x, end_x, colors='grey')
    
    bps = []
    for i, (pc, stopper, params, col_suffix, name) in enumerate(algorithms):
        col = pc + '_' + col_suffix
        positions = [tau + (width+0.1)*(i - (len(names)-1)/2) for tau in true_taus]
        arr = [result[result['max time lag'] == tau].groupby(['run']).apply(
            lambda df: simulate_stopping(df, stopper, params, col)).values 
               for tau in true_taus]
        bp = plt.boxplot(arr, positions=positions, widths=width, showfliers=False,
                         whiskerprops={'color': colors[i+1]}, boxprops={'color': colors[i+1]},
                         capprops={'color': colors[i+1]}, medianprops={'color': 'black'})
        bps.append(bp)
    
    plt.xlabel('true $\\tau$')
    plt.ylabel('estimated $\\hat{\\tau}$')
    plt.xticks(true_taus, true_taus)
    plt.title(f'$\\tau$ estimation for dimensionality={dim}, incoming edges={in_edges}, autocorr={autocorr}')
    plt.xlim(min(true_taus)-(width+0.2)*len(names)/2, max(true_taus)+(width+0.2)*len(names)/2)
    
    plt.legend([bps[i]['whiskers'][0] for i in range(len(names))], names)
    plt.show()

## 3. Visualize iterations

In [None]:
from math import floor

visualize = [(20, 3, 4, False)]
names = ['PC1 incremental', 'PC extensive', 'PC incremental']
prop = 'bics'

width = 0.4
results_path = 'notebooks/ICML/icml_results_v2/'

results = pd.read_csv(results_path + 'experiment3.csv')
true_taus = np.unique(results['max time lag'])

for group, result in results.groupby(['max time lag', 'dimension', 'incoming edges', 'autocorr', 'run']): 
    
    tau = int(group[0])
    dim = int(group[1])
    in_edges = int(group[2])
    autocorr = float(group[3])
    run = int(group[4])
    
    if (tau, dim, in_edges, autocorr) not in visualize:
        continue
    
    colors = plt.cm.CMRmap(np.linspace(0,1,len(names)+2))
    plt.figure(dpi=200, figsize=(15, 8))
    
    x = result['tau estimate']
    for i, name in enumerate(names):
        y = result[name + '_' + prop]
        plt.plot(x, y, color=colors[i+1], label=name)
    
    plt.xlabel('iteration $\\tau$')
    plt.ylabel(prop)
    plt.title(f'{prop} for dimensionality={dim}, incoming edges={in_edges}, autocorr={autocorr}, run={run}')
    
    plt.legend()
    plt.show()