In [1]:
import sys

from tigramite import data_processing
from tigramite.independence_tests import CMIknn
from tigramite.pcmci import PCMCI
import pandas as pd
import numpy as np

good_trials = ['DAP050(1)']
features = ["coughing", "pm2_5", "temperature", "humidity"]


for trial in good_trials:
    full_p = list()
    full_v = list()
    print("Trial: {}".format(trial))
    try:
        data = pd.read_csv(f"../../data/DAPHNE/PCMCI/{trial}.csv",
                                  infer_datetime_format=True, parse_dates=["timestamp"], index_col="timestamp")
        data = data[features]
        # Create a dataframe with the trial data
    except:
        print("ERROR")
        break
    # Create a dataframe with the trial data
    dataframe = data_processing.DataFrame(data.values, missing_flag=999.)
    # Initialise the non-linear CMIknn test
    cmi_knn = CMIknn(significance='shuffle_test', knn=0.1, shuffle_neighbors=5, transform='ranks', n_jobs=-1)
    # Initialise PCMCI
    pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cmi_knn, verbosity=0)
    # For 7 lags in the first hour and every ten minutes afterwards up to the 8th hour
    for lag in [1, 5, 10, 15, 30, 45, 60] + [i * 10 for i in range(7, 49)]:
        print("Time lag: {}".format(lag))
        # Use a RNG seed to reproduce results
        np.random.seed(0)
        # Run PCMCI
        results = pcmci.run_pcmciplus(tau_min=lag, tau_max=lag, pc_alpha=0.05)
        pvalues = results["p_matrix"][1][0][-1]
        stats = results["val_matrix"][1][0][-1]
        print("Lag {}".format(lag))
        print(pvalues)
        print(stats)
        full_p.append(pvalues)
        full_v.append(stats)

        df = pd.DataFrame(np.array(full_p))
        df.to_csv("results/p/non_linear_p50_trial_{}_{}_8h.csv".format(trial, lag), index=False)

        df = pd.DataFrame(np.array(full_v))
        df.to_csv("results/v/non_linear_v50_trial_{}_{}_8h.csv".format(trial, lag), index=False)

    try:
        # Save results
        df = pd.DataFrame(np.array(full_p).reshape(1, 49),
                          columns=[1, 5, 10, 15, 30, 45, 60] + [i * 10 for i in range(7, 49)])
        df.to_csv("results/p/non_linear_p50_{}_8h.csv".format(trial), index=False)

        df = pd.DataFrame(np.array(full_v).reshape(1, 49),
                          columns=[1, 5, 10, 15, 30, 45, 60] + [i * 10 for i in range(7, 49)])
        df.to_csv("results/v/non_linear_v50_{}_8h.csv".format(trial), index=False)

    except:
        continue

Trial: DAP050(1)
Time lag: 1
Lag 1
0.995
0.002820165603153235
Time lag: 5
Lag 5
0.19
0.004216895016156741
Time lag: 10
Lag 10
0.266
0.0061454644879495035
Time lag: 15
Lag 15
0.597
0.0015030477431423606
Time lag: 30
Lag 30
0.497
0.0028920581431277625
Time lag: 45
Lag 45
0.589
0.004704349263530361
Time lag: 60
Lag 60
0.651
0.00514166946689798
Time lag: 70
Lag 70
0.125
0.007439777695321226
Time lag: 80
Lag 80
0.167
0.004635249221409232
Time lag: 90
Lag 90
0.197
0.005900735023844028
Time lag: 100
Lag 100
0.522
0.005871924790414873
Time lag: 110
Lag 110
0.218
0.008790650466236727
Time lag: 120
Lag 120
0.134
0.004182283753390692
Time lag: 130
Lag 130
0.0
0.03418204330133623
Time lag: 140
Lag 140
0.0
0.030260784650213424
Time lag: 150
Lag 150
0.0
0.026887801805871803
Time lag: 160
Lag 160
0.002
0.04215918659599005
Time lag: 170
Lag 170
0.005
0.03426139777412729
Time lag: 180
Lag 180
0.033
0.023172108234049915
Time lag: 190
Lag 190
0.057
0.021966382194333
Time lag: 200
Lag 200
0.067
0.02011212