In [2]:
"""
This file must contain a function called my_method that triggers all the steps 
required in order to obtain

 *val_matrix: mandatory, (N, N) matrix of scores for links
 *p_matrix: optional, (N, N) matrix of p-values for links; if not available, 
            None must be returned
 *lag_matrix: optional, (N, N) matrix of time lags for links; if not available, 
              None must be returned

Zip this file (together with other necessary files if you have further handmade 
packages) to upload as a code.zip. You do NOT need to upload files for packages 
that can be imported via pip or conda repositories. Once you upload your code, 
we are able to validate results including runtime estimates on the same machine.
These results are then marked as "Validated" and users can use filters to only 
show validated results.

Shown here is a vector-autoregressive model estimator as a simple method.
"""

import numpy as np
import statsmodels.tsa.api as tsa

# Your method must be called 'my_method'
# Describe all parameters (except for 'data') in the method registration on CauseMe
def my_method(data, maxlags=1, correct_pvalues=True):

    # Input data is of shape (time, variables)
    T, N = data.shape

    # Standardize data
    data -= data.mean(axis=0)
    data /= data.std(axis=0)

    # Fit VAR model and get coefficients and p-values
    tsamodel = tsa.var.var_model.VAR(data)
    results = tsamodel.fit(maxlags=maxlags,  trend='nc')
    pvalues = results.pvalues
    values = results.coefs

    # CauseMe requires to upload a score matrix and
    # optionally a matrix of p-values and time lags where
    # the links occur

    # In val_matrix an entry [i, j] denotes the score for the link i --> j and
    # must be a non-negative real number with higher values denoting a higher
    # confidence for a link.
    # Fitting a VAR model results in several lagged coefficients for a
    # dependency of j on i.
    # Here we pick the absolute value of the coefficient corresponding to the
    # lag with the smallest p-value.
    val_matrix = np.zeros((N, N), dtype='float32')

    # Matrix of p-values
    p_matrix = np.ones((N, N), dtype='float32')

    # Matrix of time lags
    lag_matrix = np.zeros((N, N), dtype='uint8')

    for j in range(N):
        for i in range(N):

            # Store only values at lag with minimum p-value
            tau_min_pval = np.argmin(pvalues[
                                    (np.arange(1, maxlags+1)-1)*N + i , j]) + 1
            p_matrix[i, j] = pvalues[(tau_min_pval-1)*N + i , j]

            # Store absolute coefficient value as score
            val_matrix[i, j] = np.abs(values[tau_min_pval-1, j, i])

            # Store lag
            lag_matrix[i, j] = tau_min_pval

    # Optionally adjust p-values since we took the minimum over all lags 
    # [1..maxlags] for each i-->j; should lead to an expected false positive
    # rate of 0.05 when thresholding the (N, N) p-value matrix at alpha=0.05
    # You can, of course, use different ways or none. This will only affect
    # evaluation metrics that are based on the p-values, see Details on CauseMe
    if correct_pvalues:
        p_matrix *= float(maxlags)
        p_matrix[p_matrix > 1.] = 1.

    return val_matrix, p_matrix, lag_matrix

In [5]:
"""
This script can be used to iterate over the datasets of a particular experiment.
Below you import your function "my_method" stored in the module causeme_my_method.

Importantly, you need to first register your method on CauseMe.
Then CauseMe will return a hash code that you use below to identify which method
you used. Of course, we cannot check how you generated your results, but we can
validate a result if you upload code. Users can filter the Ranking table to only
show validated results.
"""

# Imports
import numpy as np
import json
import zipfile
import bz2
import time

#from causeme_my_method import my_method
#from pcmci import my_method

# Setup a python dictionary to store method hash, parameter values, and results
results = {}

################################################
# Identify method and used parameters
################################################

# Method name just for file saving
method_name = 'PCMCI-python'

# Insert method hash obtained from CauseMe after method registration
results['method_sha'] = "c00a723b779142d68b80669cb0949e7c"

# The only parameter here is the maximum time lag
maxlags = 3

# Parameter values: These are essential to validate your results
# provided that you also uploaded code
results['parameter_values'] = "maxlags=%d" % maxlags

#################################################
# Experiment details
#################################################
# Choose model and experiment as downloaded from causeme Testlinear-VAR_N-10_T-150 TestCLIM1-1_N-20_T-5000
results['model'] = 'Finalnonlinear-VAR'

# Here we choose the setup with N=3 variables and time series length T=150
experimental_setup = 'N-20_T-600'
results['experiment'] = results['model'] + '_' + experimental_setup

# Adjust save name if needed
save_name = '{}_{}_{}'.format(method_name,
                              results['parameter_values'],
                              results['experiment'])

# Setup directories (adjust to your needs)
experiment_zip = 'experiments/%s.zip' % results['experiment']
results_file = 'results/%s.json.bz2' % (save_name)

#################################################

# Start of script
scores = []
pvalues = []
lags = []
runtimes = []

# (Note that runtimes on causeme are only shown for validated results, this is more for
# your own assessment here)

# Loop over all datasets within an experiment
# Important note: The datasets need to be stored in the order of their filename
# extensions, hence they are sorted here
print("Load data")
with zipfile.ZipFile(experiment_zip, "r") as zip_ref:
    for name in sorted(zip_ref.namelist()):

        print("Run {} on {}".format(method_name, name))
        data = np.loadtxt(zip_ref.open(name))

        # Runtimes for your own assessment
        start_time = time.time()

        # Run your method (adapt parameters if needed)
        val_matrix, p_matrix, lag_matrix = my_method(data, maxlags)
        runtimes.append(time.time() - start_time)

        # Now we convert the matrices to the required format
        # and write the results file
        scores.append(val_matrix.flatten())

        # pvalues and lags are recommended for a more comprehensive method evaluation,
        # but not required. Then you can leave the dictionary field empty          
        if p_matrix is not None: pvalues.append(p_matrix.flatten())
        if lag_matrix is not None: lags.append(lag_matrix.flatten())

# Store arrays as lists for json
results['scores'] = np.array(scores).tolist()
if len(pvalues) > 0: results['pvalues'] = np.array(pvalues).tolist()
if len(lags) > 0: results['lags'] = np.array(lags).tolist()
results['runtimes'] = np.array(runtimes).tolist()

# Save data
print('Writing results ...')
results_json = bytes(json.dumps(results), encoding='latin1')
with bz2.BZ2File(results_file, 'w') as mybz2:
    mybz2.write(results_json)


Load data
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0001.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0002.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0003.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0004.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0005.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0006.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0007.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0008.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0009.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0010.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0011.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0012.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0013.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0014.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0015.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0016.txt
Run VAR-python on Finalnonlinear-VAR_N-20_T-600_0017.txt
Run VAR-python on Fin