In [2]:
import numpy as np
import ols_gc
import pandas as pd
from lag_data_gen import LaggedTimeSeriesArray

In [3]:
class GrangerCausality():
    def __init__(self, lts_array, endog, exog, lag = None):
        ''' input LaggedTimeSeriesDF object as lts_df'''
        self.array = lts_array
        self.endog = endog  # Dependent Variable Y
        self.exog = exog  # Independent Variable X
        self.lag = lag

    def Granger_Caus(self, df=None, n_shuffles=0):
        ## Prepare lists for storing results
        granger_causalities = [0,0]
        GCs = []
        shuffled_TEs = []
        p_values = []
        z_scores = []

        df = np.copy(self.array)

        ## Require us to compare information transfer bidirectionally
        for i in range(2):
            ## Calculate Residuals after OLS Fitting, for both Independent and Joint Cases
            joint_residuals = ols_gc.ols_res_cal(df[:, i], df[:, [i+2, 3-i]])
            independent_residuals = ols_gc.ols_res_cal(df[:, i], df[:, i+2].reshape(-1, 1))

            ## Use Geweke's formula for Granger Causality 
            granger_causalities[i] = ols_gc.granger_cal(independent_residuals, joint_residuals)
        GCs.append(granger_causalities)
        ## Calculate Significance of GC during this window
        if n_shuffles > 0:
            p, z, TE_mean = significance(df=df,
                                            TE=granger_causalities,
                                            endog=self.endog,
                                            exog=self.exog,
                                            lag=self.lag,
                                            n_shuffles=n_shuffles,
                                            method='granger_causality')

            shuffled_TEs.append(TE_mean)
            p_values.append(p)
            z_scores.append(z)
            # column are [XY, YX]
            # rows are [TE, p_value, z_score, shuffled_TE]
            self.results = np.concatenate(
                (np.array(GCs), np.array(p_values), np.array(z_scores), np.array(shuffled_TEs)), axis=0)
        else:
            ## Store Granger Causality from X(t)->Y(t) and from Y(t)->X(t)
            self.results = np.array(GCs)

        return self.results
    
    # def ols_res_cal(self, y, x):
    #     x = np.append(np.ones((len(x), 1)), x, axis=1)
    #     ins = np.linalg.inv(np.dot(x.T, x))
    #     out = np.dot(x.T, y)
    #     beta = np.dot(ins, out)
    #     res = y - np.dot(x, beta)
    #     return res

    # def granger_cal(self, independent_residuals, joint_residuals):
    #     ind_res_var = np.var(independent_residuals) + np.finfo(np.float64).eps
    #     jnt_res_var = np.var(joint_residuals) + np.finfo(np.float64).eps
    #     gc =  np.log(ind_res_var / jnt_res_var)
    #     return gc
    

def significance(df, TE, endog, exog, lag, n_shuffles, method, bandwidth=None):
    """
        Perform significance analysis on the hypothesis test of statistical causality, for both X(t)->Y(t)
        and Y(t)->X(t) directions
   
        Calculated using:  Assuming stationarity, we shuffle the time series to provide the null hypothesis. 
                           The proportion of tests where TE > TE_shuffled gives the p-value significance level.
                           The amount by which the calculated TE is greater than the average shuffled TE, divided
                           by the standard deviation of the results, is the z-score significance level.

        Arguments:
            TE              -      (list)    Contains the transfer entropy in each direction, i.e. [TE_XY, TE_YX]
            endog           -      (string)  The endogenous variable in the TE analysis being significance tested (i.e. X or Y) 
            exog            -      (string)  The exogenous variable in the TE analysis being significance tested (i.e. X or Y) 
            pdf_estimator   -      (string)  The pdf_estimator used in the original TE analysis
            bins            -      (Dict of lists)  The bins used in the original TE analysis

            n_shuffles      -      (float) Number of times to shuffle the dataframe, destroyig temporality
            both            -      (Bool) Whether to shuffle both endog and exog variables (z-score) or just exog                                  variables (giving z*-score)  
        Returns:
            p_value         -      Probablity of observing the result given the null hypothesis
            z_score         -      Number of Standard Deviations result is from mean (normalised)
        """

    ## Prepare array for Transfer Entropy of each Shuffle
    shuffled_TEs = np.zeros(shape=(2, n_shuffles))

    for i in range(n_shuffles):
        ## Perform Shuffle
        df = shuffle_along_axis(df, axis=0)

        if method == 'granger_causality':
            ## Calculate New TE
            shuffled_causality = GrangerCausality(df, endog=endog, exog=exog, lag=lag)
            TE_shuffled = shuffled_causality.Granger_Caus(df, n_shuffles=0)

    ## Calculate p-values for each direction
    p_values = (np.count_nonzero(TE[0] < shuffled_TEs[0, :]) / n_shuffles, \
                np.count_nonzero(TE[1] < shuffled_TEs[1, :]) / n_shuffles)

    shuff_te_zero = np.std(shuffled_TEs[0, :]) + np.finfo(float).eps
    shuff_te_one = np.std(shuffled_TEs[1, :]) + np.finfo(float).eps

    ## Calculate z-scores for each direction
    z_scores = ((TE[0] - np.mean(shuffled_TEs[0, :])) / shuff_te_zero, \
                (TE[1] - np.mean(shuffled_TEs[1, :])) / shuff_te_one)

    TE_mean = (np.mean(shuffled_TEs[0, :]), \
               np.mean(shuffled_TEs[1, :]))

    ## Return the self.DF value to the unshuffled case
    return p_values, z_scores, TE_mean

def shuffle_along_axis(a, axis):
    idx = np.random.rand(*a.shape).argsort(axis=axis)
    return np.take_along_axis(a, idx, axis=axis)

In [4]:
test_data = pd.read_csv('../PyCausality/Testing/Test_Utils/test_data.csv')

In [5]:
lts_df = LaggedTimeSeriesArray(test_data[['S2', 'S3']], lag=2).df

In [6]:
%timeit GrangerCausality(lts_df, 'S2', 'S3', lag=2).Granger_Caus(n_shuffles=20).transpose()

2.93 ms ± 23.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
brexit_data = pd.read_csv('../data/return_pre_brexit.csv')

In [9]:
brexit_data

Unnamed: 0,YYYY/MM/DD,USD/XAU,AUD/XAU,BRL/XAU,GBP/XAU,CAD/XAU,CLP/XAU,CNY/XAU,DKK/XAU,EGP/XAU,...,SGD/XAU,ZAR/XAU,KRW/XAU,LKR/XAU,SEK/XAU,CHF/XAU,TWD/XAU,THB/XAU,TRY/XAU,AED/XAU
0,1/4/16,0.021038,0.038147,0.047330,0.025223,0.030555,0.034264,0.027656,0.026476,0.021090,...,0.028100,0.019860,0.034931,0.018773,0.028047,0.022840,0.027608,0.024948,0.038432,0.016098
1,1/5/16,-0.004897,-0.002758,-0.017755,-0.003231,-0.003187,-0.010816,-0.006906,0.000274,-0.005050,...,-0.003781,-0.010703,-0.005732,-0.002408,0.006005,0.000483,-0.005317,-0.005997,0.000402,-0.006120
2,1/6/16,0.013370,0.024963,0.015316,0.015658,0.019961,0.014279,0.018543,0.011926,0.013319,...,0.018834,0.012317,0.021710,0.011615,0.013724,0.012216,0.016826,0.016540,0.020641,0.013703
3,1/7/16,0.013744,0.020337,0.021691,0.017169,0.012088,0.024035,0.019202,0.004183,0.013420,...,0.010073,0.005431,0.007749,0.013462,0.002904,0.007402,0.012799,0.013128,0.011213,0.020838
4,1/8/16,-0.004067,0.002858,-0.004559,-0.000089,0.002307,0.004367,-0.003297,-0.006431,-0.003227,...,0.003241,-0.019306,0.008365,-0.005658,-0.001926,-0.008926,-0.000051,-0.000795,0.001250,0.000069
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,6/24/16,0.042228,0.062890,0.055812,0.131528,0.058574,0.058022,0.049375,0.064581,0.041629,...,0.049620,0.034898,0.063351,0.041236,0.076509,0.056912,0.054341,0.047000,0.067671,0.045122
122,6/27/16,0.006918,0.019592,0.011434,0.038723,0.016713,0.008767,0.011540,0.016194,0.007587,...,0.017144,0.013131,0.019505,0.011029,0.021369,0.015874,0.012173,0.007915,0.009186,0.004430
123,6/28/16,-0.011249,-0.010812,-0.038872,-0.016946,-0.010341,-0.027591,-0.011858,-0.013901,-0.016916,...,-0.015965,-0.011455,-0.021868,-0.009104,-0.013571,-0.008703,-0.015258,-0.013794,-0.023273,-0.013159
124,6/29/16,0.009010,-0.003454,-0.012610,-0.007178,0.001379,-0.005239,0.007522,0.002923,0.014746,...,0.002611,0.001343,-0.003929,-0.000133,0.000076,0.006964,0.003874,0.005977,0.006713,0.007937


In [11]:
brex_lts_df = LaggedTimeSeriesArray(brexit_data[['USD/XAU', 'AUD/XAU']], lag=2).df

In [12]:
%timeit GrangerCausality(brex_lts_df, 'USD/XAU', 'AUD/XAU', lag=2).Granger_Caus(n_shuffles=20).transpose()

1.07 ms ± 2.15 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
