In [None]:
#Historical Granger Tests Corrected

In [None]:
#Part 1: Functions and Libraries

In [1]:
#Granger Causality Test
#https://www.maths.usyd.edu.au/u/jchan/Consult/W10_CompareTwoTimeSeries.pdf
#https://www.machinelearningplus.com/time-series/granger-causality-test-in-python/
from statsmodels.tsa.stattools import grangercausalitytests
#?grangercausalitytests

In [2]:
#Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy.stats as stats
pd.set_option('display.max_columns', 500)

In [3]:
#Granger Function Variables
maxlag = 1
test = 'ssr_ftest'

In [4]:
#This function goes through all the lags possible
def grangers_causation_matrix(data, variables, test='ssr_ftest', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag = maxlag, verbose=False)
            p_values = [round(test_result[i + 1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

In [5]:
#This function just gets the single lag data
def grangers_causation_matrix2(data, variables, test='ssr_ftest', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag = [maxlag], verbose=False)
            p_values = test_result[maxlag][0][test][1]
            #p_values = [round(test_result[i + 1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

In [None]:
####################################################################################################################

In [7]:
#Part 2: Datasets
#FTSE 100
ft100 = pd.read_csv("/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Ftse_csv.csv")
ft100.tail()

Unnamed: 0,Date,FTSE100,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
99,4/26/22,7386.200195,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-613302.0
100,4/27/22,7425.600098,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-616905.0
101,4/28/22,7509.200195,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-620219.0
102,4/29/22,7544.600098,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-624696.0
103,,,,,,,,,,,,


In [8]:
ft100_2 = ft100.drop(["Date"], axis = 1)
ft100_2.tail()

Unnamed: 0,FTSE100,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
99,7386.200195,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-613302.0
100,7425.600098,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-616905.0
101,7509.200195,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-620219.0
102,7544.600098,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-624696.0
103,,,,,,,,,,,


In [9]:
ft100_2_5 = ft100_2.dropna()
ft100_3 = ft100_2_5.reset_index(drop = True)
ft100_3.tail()

Unnamed: 0,FTSE100,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
98,7380.5,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-609443.0
99,7386.200195,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-613302.0
100,7425.600098,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-616905.0
101,7509.200195,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-620219.0
102,7544.600098,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-624696.0


In [10]:
#FTSE 100 Differencing
ft100_diff = ft100_3.diff()
ft100_diff.head()

Unnamed: 0,FTSE100,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,,,,,,,,,,,
1,-39.5,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2,-6.90039,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
3,110.0,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
4,107.600097,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [11]:
#Remove the first row 
ft100_diff3 = ft100_diff.drop([0])
ft100_diff4 = ft100_diff3.reset_index(drop = True)
ft100_diff4.head()

Unnamed: 0,FTSE100,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,-39.5,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-6.90039,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,110.0,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,107.600097,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,-2.5,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [22]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
ft100_afi = ft100_diff4["Sent_Cul_Sum"].values
resultft100 = adfuller(ft100_afi)
print('ADF Statistic: %f' % resultft100[0])
print('p-value: %f' % resultft100[1])
print('Critical Values:')
for key, value in resultft100[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -7.334397
p-value: 0.000000
Critical Values:
	1%: -3.497
	5%: -2.891
	10%: -2.582


In [37]:
#Granger Function Variables
maxlag = 10
test = 'ssr_ftest'

In [38]:
grangers_causation_matrix2(ft100_diff4, variables = ft100_diff4.columns)

Unnamed: 0,FTSE100_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
FTSE100_y,1.0,0.001059,0.40871,0.00014,6.1e-05,0.120797,0.00014,0.000514,0.435651,0.00014,0.002313
Sent_Sum_y,0.806077,1.0,0.544343,0.760439,0.477459,0.313597,0.760439,0.999503,0.328647,0.760439,0.902901
Sent_Mean_y,0.739801,0.995685,1.0,0.96653,0.980922,0.702278,0.96653,0.993415,0.655207,0.96653,0.989314
Sent_Count_y,0.958409,0.87006,0.559546,1.0,0.993291,0.410397,1.0,0.755088,0.378477,1.0,0.474808
Scale_Sum_y,0.850117,0.410423,0.382784,0.987073,1.0,0.35158,0.987073,0.155945,0.18273,0.987073,0.146221
Scale_Mean_y,0.19279,0.964926,0.388348,0.938798,0.953883,1.0,0.938798,0.963691,0.37898,0.938798,0.823849
Scale_Count_y,0.958409,0.87006,0.559546,1.0,0.993291,0.410397,1.0,0.755088,0.378477,1.0,0.474808
ScSe_Sum_y,0.836062,0.999627,0.539308,0.61001,0.215166,0.31824,0.61001,1.0,0.322762,0.61001,0.850293
ScSe_Mean_y,0.780095,0.996222,0.63432,0.942554,0.980336,0.711651,0.942554,0.993821,1.0,0.942554,0.986948
ScSe_Count_y,0.958409,0.87006,0.559546,1.0,0.993291,0.410397,1.0,0.755088,0.378477,1.0,0.474808


In [39]:
#grangers_causation_matrix(ft100_diff4, variables = ft100_diff4.columns)

In [None]:
#####################################################################################################################

In [40]:
#Import Futures Time Series Data
cd4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Futures_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
cd4.tail()

Unnamed: 0_level_0,Gold_Futures,Oil_Futures,Wheat_Futures,Nat_Gas_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-04-25,1893.199951,98.540001,1062.0,6.669,-2635.0,-0.17,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-616410.0
2022-04-26,1901.400024,101.699997,1083.25,6.85,-3859.0,-0.24,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-620269.0
2022-04-27,1885.900024,102.019997,1080.0,7.267,-3603.0,-0.24,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-623872.0
2022-04-28,1888.699951,105.360001,1074.0,6.888,-3314.0,-0.14,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-627186.0
2022-04-29,1909.300049,104.690002,1043.75,7.244,-4477.0,-0.24,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-631663.0


In [41]:
#Futures Differencing
cd4_diff = cd4.diff()
cd4_diff.head()

Unnamed: 0_level_0,Gold_Futures,Oil_Futures,Wheat_Futures,Nat_Gas_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-12-01,,,,,,,,,,,,,,
2021-12-02,-20.900025,0.93,28.0,-0.202,-2142.0,-0.329494,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,21.300049,-0.239998,-12.0,0.076,303.0,0.09,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,-4.5,3.229996,2.5,-0.475,-376.0,-0.08,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,5.099976,2.560005,2.25,0.051,-4185.0,-0.06,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [44]:
cd4_diff2_5 = cd4_diff.reset_index(drop = True)
#cd4_diff2 = cd4_diff2_5.drop(["Date"], axis = 1)
cd4_diff2_5.tail()

Unnamed: 0,Gold_Futures,Oil_Futures,Wheat_Futures,Nat_Gas_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
99,-37.800049,-3.529999,-3.5,0.135,1861.0,0.09,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
100,8.200073,3.159996,21.25,0.181,-1224.0,-0.07,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
101,-15.5,0.32,-3.25,0.417,256.0,0.0,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
102,2.799927,3.340004,-6.0,-0.379,289.0,0.1,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
103,20.600098,-0.669998,-30.25,0.356,-1163.0,-0.1,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [46]:
#Remove the first row 
cd4_diff3 = cd4_diff2_5.drop([0])
cd4_diff4 = cd4_diff3.reset_index(drop = True)
cd4_diff4.head()

Unnamed: 0,Gold_Futures,Oil_Futures,Wheat_Futures,Nat_Gas_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,-20.900025,0.93,28.0,-0.202,-2142.0,-0.329494,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,21.300049,-0.239998,-12.0,0.076,303.0,0.09,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-4.5,3.229996,2.5,-0.475,-376.0,-0.08,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,5.099976,2.560005,2.25,0.051,-4185.0,-0.06,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,0.800048,0.309998,-8.0,0.107,3505.0,0.15,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [50]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
cd4_afi = cd4_diff4["Nat_Gas_Futures"].values
resultcd4 = adfuller(cd4_afi)
print('ADF Statistic: %f' % resultcd4[0])
print('p-value: %f' % resultcd4[1])
print('Critical Values:')
for key, value in resultcd4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -5.348156
p-value: 0.000004
Critical Values:
	1%: -3.502
	5%: -2.893
	10%: -2.583


In [85]:
#Granger Function Variables
maxlag = 10

In [86]:
grangers_causation_matrix2(cd4_diff4, variables = cd4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Gold_Futures_x,Oil_Futures_x,Wheat_Futures_x,Nat_Gas_Futures_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
Gold_Futures_y,1.0,0.102251,0.00248,0.592229,0.0001312338,0.743375,2.908728e-05,6.075517e-05,0.182746,2.908728e-05,9.469715e-05,0.597216,2.908728e-05,0.002645834
Oil_Futures_y,0.551031,1.0,0.001038,0.994032,8.570923e-08,0.008453,3.483837e-05,2.290948e-06,0.001817,3.483837e-05,1.709563e-07,0.004848,3.483837e-05,5.460873e-08
Wheat_Futures_y,0.037243,0.048986,1.0,0.984726,1.397196e-14,0.590337,2.162043e-15,2.997084e-17,0.427408,2.162043e-15,2.84975e-15,0.581443,2.162043e-15,8.207624e-15
Nat_Gas_Futures_y,0.954995,0.827207,0.980368,1.0,0.9738106,0.936812,0.9732377,0.9620057,0.92866,0.9732377,0.9710172,0.922806,0.9732377,0.9753146
Sent_Sum_y,0.969781,0.927111,0.608716,0.968774,1.0,0.379188,0.6502646,0.278613,0.333763,0.6502646,0.9935682,0.180002,0.6502646,0.8952009
Sent_Mean_y,0.556498,0.281502,0.518482,0.960578,0.9892376,1.0,0.9672615,0.9765576,0.908033,0.9672615,0.9842429,0.464595,0.9672615,0.9736616
Sent_Count_y,0.761458,0.989391,0.64987,0.971935,0.8543216,0.535283,1.0,0.9961892,0.320839,1.0,0.7519376,0.330667,1.0,0.5354173
Scale_Sum_y,0.903526,0.859866,0.579001,0.960849,0.2575167,0.42491,0.9996206,1.0,0.401036,0.9996206,0.09805323,0.24603,0.9996206,0.1077332
Scale_Mean_y,0.44874,0.034699,0.295447,0.507956,0.9418404,0.293293,0.9374736,0.9269644,1.0,0.9374736,0.9486177,0.428221,0.9374736,0.7523196
Scale_Count_y,0.761458,0.989391,0.64987,0.971935,0.8543216,0.535283,1.0,0.9961892,0.320839,1.0,0.7519376,0.330667,1.0,0.5354173


In [None]:
##################################################################################################################

In [87]:
#Import US MKT Time Series Data
cd3 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/US_1.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
cd3.tail()

Unnamed: 0_level_0,S_P,MSCI,VIX,10_Year_Treasury,Defense_ETF,Metals_ETF,HY_ETF,IG_ETF,10Y_Futures,Bitcoin_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2022-04-25,4296.120117,462.299988,27.02,2.826,107.639999,57.290001,37.77,52.799999,119.609375,40210.0,-2635,-0.167653,15717,-36474.0,-2.320672,15717,-28553.8,-1.816746,15717,-616410
2022-04-26,4175.200195,429.809998,33.52,2.772,106.160004,56.290001,37.549999,52.82,120.046875,38220.0,-3859,-0.24155,15976,-27256.0,-1.706059,15976,-42725.7,-2.674368,15976,-620269
2022-04-27,4183.959961,417.779999,31.6,2.818,105.300003,57.299999,37.34,52.369999,119.796875,38900.0,-3603,-0.241149,14941,-27635.7,-1.849655,14941,-39186.1,-2.622723,14941,-623872
2022-04-28,4287.5,433.380005,29.99,2.863,105.870003,58.360001,37.52,52.560001,119.359375,39920.0,-3314,-0.139672,23727,-7103.7,-0.299393,23727,-36851.5,-1.553146,23727,-627186
2022-04-29,4131.930176,421.25,33.4,2.887,102.860001,56.209999,37.040001,52.060001,119.15625,38997.30078,-4477,-0.243938,18353,-5001.3,-0.272506,18353,-44580.1,-2.429036,18353,-631663


In [88]:
#Futures Differencing
cd3_diff = cd3.diff()
cd3_diff.head()

Unnamed: 0_level_0,S_P,MSCI,VIX,10_Year_Treasury,Defense_ETF,Metals_ETF,HY_ETF,IG_ETF,10Y_Futures,Bitcoin_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-12-01,,,,,,,,,,,,,,,,,,,,
2021-12-02,64.060059,13.230041,-3.17,0.014,3.57,0.940002,0.290001,0.110001,-0.328125,185.0,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,-38.669922,-26.130005,2.72,-0.105,-0.739998,-0.310001,-0.009998,0.360001,0.8125,-3630.0,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,53.239746,4.700012,-3.49,0.091,2.57,0.5,0.109997,-0.25,-0.78125,-4585.0,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,95.080078,28.919983,-5.29,0.046,0.800003,0.920002,0.280003,-0.010002,-0.3125,1640.0,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [89]:
cd3_diff2_5 = cd3_diff.reset_index(drop = True)
cd3_diff2_5.tail()

Unnamed: 0,S_P,MSCI,VIX,10_Year_Treasury,Defense_ETF,Metals_ETF,HY_ETF,IG_ETF,10Y_Futures,Bitcoin_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
99,24.340332,6.259979,-1.19,-0.08,-0.669998,-1.129997,0.299999,0.469997,0.671875,755.0,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
100,-120.919922,-32.48999,6.5,-0.054,-1.479996,-1.0,-0.220001,0.02,0.4375,-1990.0,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
101,8.759766,-12.029999,-1.92,0.046,-0.860001,1.009998,-0.209999,-0.450001,-0.25,680.0,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
102,103.540039,15.600006,-1.61,0.045,0.57,1.060001,0.18,0.190002,-0.4375,1020.0,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
103,-155.569824,-12.130005,3.41,0.024,-3.010002,-2.150002,-0.48,-0.5,-0.203125,-922.69922,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [90]:
#Remove the first row 
cd3_diff3 = cd3_diff2_5.drop([0])
cd3_diff4 = cd3_diff3.reset_index(drop = True)
cd3_diff4.head()

Unnamed: 0,S_P,MSCI,VIX,10_Year_Treasury,Defense_ETF,Metals_ETF,HY_ETF,IG_ETF,10Y_Futures,Bitcoin_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,64.060059,13.230041,-3.17,0.014,3.57,0.940002,0.290001,0.110001,-0.328125,185.0,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-38.669922,-26.130005,2.72,-0.105,-0.739998,-0.310001,-0.009998,0.360001,0.8125,-3630.0,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,53.239746,4.700012,-3.49,0.091,2.57,0.5,0.109997,-0.25,-0.78125,-4585.0,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,95.080078,28.919983,-5.29,0.046,0.800003,0.920002,0.280003,-0.010002,-0.3125,1640.0,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,14.459961,9.02002,-1.99,0.029,1.129997,0.529999,0.009998,-0.279999,-0.109375,325.0,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [100]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
cd3_afi = cd3_diff4["Bitcoin_Futures"].values
resultcd3 = adfuller(cd3_afi)
print('ADF Statistic: %f' % resultcd3[0])
print('p-value: %f' % resultcd3[1])
print('Critical Values:')
for key, value in resultcd3[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -9.346846
p-value: 0.000000
Critical Values:
	1%: -3.496
	5%: -2.890
	10%: -2.582


In [105]:
#Granger Function Variables
maxlag = 10

In [106]:
grangers_causation_matrix2(cd3_diff4, variables = cd3_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,S_P_x,MSCI_x,VIX_x,10_Year_Treasury_x,Defense_ETF_x,Metals_ETF_x,HY_ETF_x,IG_ETF_x,10Y_Futures_x,Bitcoin_Futures_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
S_P_y,1.0,0.879528,0.410806,0.439572,0.333405,0.191148,0.988312,0.781985,0.558613,0.115725,0.08615,0.898079,0.030463,0.073474,0.401075,0.030463,0.066397,0.716729,0.030463,0.085893
MSCI_y,0.510241,1.0,0.41283,0.513002,0.1242,0.159477,0.553309,0.25818,0.529126,0.817219,0.074236,0.259183,0.064882,0.073681,0.351081,0.064882,0.06941,0.237161,0.064882,0.07826
VIX_y,0.826762,0.944585,1.0,0.706102,0.606493,0.544461,0.948835,0.826763,0.756025,0.145597,0.060305,0.89713,0.035912,0.069326,0.444068,0.035912,0.052887,0.755148,0.035912,0.073882
10_Year_Treasury_y,0.55473,0.120525,0.482382,1.0,0.949743,0.893939,0.66117,0.962852,0.651076,0.318155,0.000575,0.006567,0.006199,0.001015,0.312994,0.006199,0.00052,0.002342,0.006199,0.000512
Defense_ETF_y,0.373115,0.874088,0.121715,0.263084,1.0,0.531768,0.688612,0.465615,0.323552,0.060666,0.005718,0.493789,0.002174,0.000366,0.495414,0.002174,0.003364,0.330621,0.002174,0.005795
Metals_ETF_y,0.901917,0.831835,0.835286,0.309841,0.40596,1.0,0.74016,0.506787,0.398479,0.318298,0.039279,0.034696,0.036106,0.054325,0.241633,0.036106,0.032792,0.00885,0.036106,0.251235
HY_ETF_y,0.261351,0.752513,0.120569,0.194944,0.620306,0.020705,1.0,0.43858,0.24213,0.18737,0.595486,0.136853,0.333506,0.363097,0.690691,0.333506,0.569512,0.091953,0.333506,0.610335
IG_ETF_y,0.370655,0.276127,0.49991,0.788471,0.865822,0.266581,0.145503,1.0,0.748501,0.183571,0.084068,0.003159,0.114707,0.077858,0.912594,0.114707,0.083527,0.002426,0.114707,0.094245
10Y_Futures_y,0.673372,0.331928,0.654355,0.456486,0.963552,0.974648,0.683861,0.992223,1.0,0.386396,0.000117,0.007187,0.001412,0.000215,0.477192,0.001412,0.000107,0.003409,0.001412,8.5e-05
Bitcoin_Futures_y,0.62558,0.568663,0.503148,0.784753,0.571468,0.816466,0.764938,0.648927,0.771695,1.0,0.010613,0.886391,0.012024,0.00462,0.710773,0.012024,0.009659,0.888607,0.012024,0.010153


In [None]:
##################################################################################################################

In [62]:
#Import MSCI Futures Time Series Data
mf3 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/MSCI_Futures.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
mf3.tail()

Unnamed: 0_level_0,MSCI-Future,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,2893.100098,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-578386.0
2022-04-26,2814.399902,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-582245.0
2022-04-27,2821.300049,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-585848.0
2022-04-28,2879.600098,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-589162.0
2022-04-29,2797.0,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-593639.0


In [63]:
#MSCI Futures Differencing
mf3_diff = mf3.diff()
mf3_diff.head()

Unnamed: 0_level_0,MSCI-Future,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-20,,,,,,,,,,,
2021-12-21,51.800049,-1035.0,-0.000678,2362.0,-8245.2,-0.089403,2362.0,-10306.4,-0.084308,2362.0,-3648.0
2021-12-22,31.0,1823.0,0.155526,-1862.0,5394.6,-0.105085,-1862.0,17823.6,1.554405,-1862.0,-1825.0
2021-12-23,21.300049,-2080.0,-0.241676,976.0,1899.4,0.694252,976.0,-18447.3,-2.129896,976.0,-3905.0
2021-12-27,35.699951,1312.0,0.063517,-1824.0,6042.4,0.206061,-1824.0,12758.6,0.733073,-1824.0,-2593.0


In [64]:
mf3_diff2_5 = mf3_diff.reset_index(drop = True)
mf3_diff2_5.tail()

Unnamed: 0,MSCI-Future,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
86,3.5,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
87,-78.700196,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
88,6.900147,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
89,58.300049,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
90,-82.600098,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [66]:
#Remove the first row 
mf3_diff3 = mf3_diff2_5.drop([0])
mf3_diff4 = mf3_diff3.reset_index(drop = True)
mf3_diff4.head()

Unnamed: 0,MSCI-Future,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,51.800049,-1035.0,-0.000678,2362.0,-8245.2,-0.089403,2362.0,-10306.4,-0.084308,2362.0,-3648.0
1,31.0,1823.0,0.155526,-1862.0,5394.6,-0.105085,-1862.0,17823.6,1.554405,-1862.0,-1825.0
2,21.300049,-2080.0,-0.241676,976.0,1899.4,0.694252,976.0,-18447.3,-2.129896,976.0,-3905.0
3,35.699951,1312.0,0.063517,-1824.0,6042.4,0.206061,-1824.0,12758.6,0.733073,-1824.0,-2593.0
4,-2.300049,-333.0,-0.024936,397.0,7417.3,1.389137,397.0,-882.9,0.116105,397.0,-2926.0


In [68]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
mf3_afi = mf3_diff4["MSCI-Future"].values
resultmf3 = adfuller(mf3_afi)
print('ADF Statistic: %f' % resultmf3[0])
print('p-value: %f' % resultmf3[1])
print('Critical Values:')
for key, value in resultmf3[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -7.275167
p-value: 0.000000
Critical Values:
	1%: -3.507
	5%: -2.895
	10%: -2.585


In [78]:
#Granger Function Variables
maxlag = 10

In [79]:
grangers_causation_matrix2(mf3_diff4, variables = mf3_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,MSCI-Future_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
MSCI-Future_y,1.0,0.064756,0.735975,0.0124,0.03456,0.245505,0.0124,0.047077,0.524379,0.0124,0.058383
Sent_Sum_y,0.619222,1.0,0.342778,0.777101,0.400047,0.426227,0.777101,0.996879,0.191809,0.777101,0.922601
Sent_Mean_y,0.733094,0.995807,1.0,0.985699,0.987956,0.898458,0.985699,0.993627,0.142432,0.985699,0.988036
Sent_Count_y,0.341012,0.917298,0.508951,1.0,0.998232,0.412399,1.0,0.84927,0.345553,1.0,0.660283
Scale_Sum_y,0.555479,0.354611,0.360163,0.999833,1.0,0.482508,0.999833,0.168465,0.253669,0.999833,0.180048
Scale_Mean_y,0.109335,0.957984,0.388794,0.953533,0.951526,1.0,0.953533,0.963076,0.511155,0.953533,0.805707
Scale_Count_y,0.341012,0.917298,0.508951,1.0,0.998232,0.412399,1.0,0.84927,0.345553,1.0,0.660283
ScSe_Sum_y,0.610707,0.998437,0.378215,0.690962,0.199186,0.426143,0.690962,1.0,0.222327,0.690962,0.836136
ScSe_Mean_y,0.620494,0.998105,0.088088,0.975158,0.990024,0.887759,0.975158,0.996613,1.0,0.975158,0.992549
ScSe_Count_y,0.341012,0.917298,0.508951,1.0,0.998232,0.412399,1.0,0.84927,0.345553,1.0,0.660283


In [None]:
###################################################################################################################

In [107]:
#Import 2Y Tresury Time Series Data
y2 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/2y_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
y2.tail()

Unnamed: 0_level_0,2Y Treasury Yield,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,2.722,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-617862.0
2022-04-26,2.5,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-621721.0
2022-04-27,2.551,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-625324.0
2022-04-28,2.572,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-628638.0
2022-04-29,2.719,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-633115.0


In [108]:
#2Y Futures Differencing
y2_diff = y2.diff()
y2_diff.head()

Unnamed: 0_level_0,2Y Treasury Yield,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,0.068,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,-0.028,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,-0.008,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,0.108,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [109]:
y2_diff2_5 = y2_diff.reset_index(drop = True)
y2_diff2_5.tail()

Unnamed: 0,2Y Treasury Yield,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
102,0.092,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
103,-0.222,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
104,0.051,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
105,0.021,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
106,0.147,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [110]:
#Remove the first row 
y2_diff3 = y2_diff2_5.drop([0])
y2_diff4 = y2_diff3.reset_index(drop = True)
y2_diff4.head()

Unnamed: 0,2Y Treasury Yield,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,0.068,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-0.028,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-0.008,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,0.108,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,-0.004,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [111]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
y2_afi = y2_diff4["2Y Treasury Yield"].values
resulty2 = adfuller(y2_afi)
print('ADF Statistic: %f' % resulty2[0])
print('p-value: %f' % resulty2[1])
print('Critical Values:')
for key, value in resulty2[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -12.031491
p-value: 0.000000
Critical Values:
	1%: -3.494
	5%: -2.889
	10%: -2.582


In [116]:
#Granger Function Variables
maxlag = 10

In [117]:
grangers_causation_matrix2(y2_diff4, variables = y2_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,2Y Treasury Yield_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
2Y Treasury Yield_y,1.0,0.028224,0.49982,0.054163,0.047334,0.809535,0.054163,0.030248,0.466488,0.054163,0.03151
Sent_Sum_y,0.025004,1.0,0.621593,0.809439,0.335129,0.206846,0.809439,0.99965,0.390239,0.809439,0.890913
Sent_Mean_y,0.196482,0.993296,1.0,0.977361,0.975324,0.821126,0.977361,0.990747,0.608189,0.977361,0.985706
Sent_Count_y,0.024548,0.884181,0.575565,1.0,0.993217,0.300988,1.0,0.7783,0.395282,1.0,0.496242
Scale_Sum_y,0.025697,0.26564,0.444949,0.98799,1.0,0.28454,0.98799,0.085393,0.257528,0.98799,0.077401
Scale_Mean_y,0.869632,0.983286,0.674261,0.937062,0.947908,1.0,0.937062,0.982152,0.707123,0.937062,0.853969
Scale_Count_y,0.024548,0.884181,0.575565,1.0,0.993217,0.300988,1.0,0.7783,0.395282,1.0,0.496242
ScSe_Sum_y,0.020883,0.999584,0.619419,0.676222,0.102821,0.20316,0.676222,1.0,0.392316,0.676222,0.829683
ScSe_Mean_y,0.135487,0.996084,0.466144,0.959456,0.98387,0.865511,0.959456,0.994572,1.0,0.959456,0.986903
ScSe_Count_y,0.024548,0.884181,0.575565,1.0,0.993217,0.300988,1.0,0.7783,0.395282,1.0,0.496242


In [None]:
###################################################################################################################

In [29]:
#Import FX Time Series Data
fx2 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/FX_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
fx2.tail()

Unnamed: 0_level_0,EUR,JPY,GBP,AUD,MXN,EURGBP,ZAR,RUB,CNY,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,sent_cul_sum,scale_cul_sum,scse_cul_sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-25,1.081105,128.604996,1.283088,0.723694,20.232,0.8426,15.56954,77.339996,6.5003,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-633946.0,-12334999.5,-6471561.9
2022-04-26,1.071421,127.744003,1.273999,0.71785,20.2146,0.84087,15.6731,75.739998,6.5579,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-637805.0,-12362255.5,-6514287.6
2022-04-27,1.064362,127.265999,1.2584,0.7146,20.38032,0.84574,15.82465,74.214996,6.5563,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-641408.0,-12389891.2,-6553473.7
2022-04-28,1.055509,128.393997,1.254186,0.71257,20.372299,0.84151,15.86366,73.839996,6.5595,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-644722.0,-12396994.9,-6590325.2
2022-04-29,1.05042,130.811005,1.246385,0.710884,20.441299,0.8426,15.9825,72.714996,6.6253,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-649199.0,-12401996.2,-6634905.3


In [30]:
#fx Futures Differencing
fx2_diff = fx2.diff()
fx2_diff.head()

Unnamed: 0_level_0,EUR,JPY,GBP,AUD,MXN,EURGBP,ZAR,RUB,CNY,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,sent_cul_sum,scale_cul_sum,scse_cul_sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-12-01,,,,,,,,,,,,,,,,,,,,,
2021-12-02,-0.001077,-0.417,-0.001593,-0.00174,0.059769,0.0002,0.15379,0.167,0.0041,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0,-17279.9,-24428.1
2021-12-03,-0.001331,0.139999,0.001999,-0.00198,-0.163029,-0.00228,-0.0989,-0.626503,0.0083,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0,-17006.9,-22993.0
2021-12-06,-0.000141,0.019997,-0.006268,-0.00699,-0.08754,0.00398,0.1101,0.248299,-0.0003,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0,-24903.7,-25370.9
2021-12-07,-0.001812,0.416,0.002704,0.00313,0.002699,-0.00331,-0.127899,0.4804,-0.0005,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0,-86243.3,-65007.8


In [31]:
fx2_diff2_5 = fx2_diff.reset_index(drop = True)
fx2_diff2_5.tail()

Unnamed: 0,EUR,JPY,GBP,AUD,MXN,EURGBP,ZAR,RUB,CNY,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,sent_cul_sum,scale_cul_sum,scse_cul_sum
103,-0.002413,0.196991,-0.019539,-0.012418,0.04735,0.01085,0.21042,-3.5,0.051,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0,-36474.0,-28553.8
104,-0.009684,-0.860992,-0.009089,-0.005844,-0.017401,-0.00173,0.10356,-1.599998,0.0576,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0,-27256.0,-42725.7
105,-0.007059,-0.478004,-0.015599,-0.00325,0.16572,0.00487,0.151549,-1.525002,-0.0016,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0,-27635.7,-39186.1
106,-0.008853,1.127998,-0.004214,-0.00203,-0.00802,-0.00423,0.03901,-0.375,0.0032,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0,-7103.7,-36851.5
107,-0.005089,2.417007,-0.0078,-0.001687,0.069,0.00109,0.11884,-1.125,0.0658,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0,-5001.3,-44580.1


In [32]:
#Remove the first row 
fx2_diff3 = fx2_diff2_5.drop([0])
fx2_diff4 = fx2_diff3.reset_index(drop = True)
fx2_diff4.head()

Unnamed: 0,EUR,JPY,GBP,AUD,MXN,EURGBP,ZAR,RUB,CNY,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,sent_cul_sum,scale_cul_sum,scse_cul_sum
0,-0.001077,-0.417,-0.001593,-0.00174,0.059769,0.0002,0.15379,0.167,0.0041,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0,-17279.9,-24428.1
1,-0.001331,0.139999,0.001999,-0.00198,-0.163029,-0.00228,-0.0989,-0.626503,0.0083,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0,-17006.9,-22993.0
2,-0.000141,0.019997,-0.006268,-0.00699,-0.08754,0.00398,0.1101,0.248299,-0.0003,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0,-24903.7,-25370.9
3,-0.001812,0.416,0.002704,0.00313,0.002699,-0.00331,-0.127899,0.4804,-0.0005,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0,-86243.3,-65007.8
4,-0.001018,0.067001,-0.001827,0.007339,-0.195499,0.00031,-0.060801,-0.357895,-0.0085,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0,-59565.9,-34664.6


In [34]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
fx2_afi = fx2_diff4["Sent_Sum"].values
resultfx2 = adfuller(fx2_afi)
print('ADF Statistic: %f' % resultfx2[0])
print('p-value: %f' % resultfx2[1])
print('Critical Values:')
for key, value in resultfx2[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -8.324994
p-value: 0.000000
Critical Values:
	1%: -3.495
	5%: -2.890
	10%: -2.582


In [43]:
#Granger Function Variables
maxlag = 34

In [44]:
grangers_causation_matrix2(fx2_diff4, variables = fx2_diff4.columns, test = 'ssr_ftest')



Unnamed: 0,EUR_x,JPY_x,GBP_x,AUD_x,MXN_x,EURGBP_x,ZAR_x,RUB_x,CNY_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,sent_cul_sum_x,scale_cul_sum_x,scse_cul_sum_x
EUR_y,1.0,0.781413,0.080187,0.115421,0.615831,0.055167,0.126147,0.488597,0.366009,0.07158,0.028202,0.02673,0.063803,0.521518,0.02673,0.073511,0.050362,0.02673,0.06294,0.061986,0.072663
JPY_y,0.847361,1.0,0.374438,0.917975,0.470855,0.953014,0.567874,0.482949,0.976212,0.075062,0.547211,0.019467,0.018839,0.275559,0.019467,0.049436,0.423758,0.019467,0.057352,0.121441,0.046658
GBP_y,0.768744,0.128897,1.0,0.995685,0.281122,0.81545,0.155893,0.588124,0.963893,0.583677,0.907732,0.700211,0.655429,0.382057,0.700211,0.590886,0.932966,0.700211,0.829064,0.744894,0.817543
AUD_y,0.570912,0.763031,0.971699,1.0,0.64992,0.572905,0.660966,0.891158,0.827979,0.23044,0.827408,0.221424,0.183994,0.53472,0.221424,0.239382,0.928365,0.221424,0.574194,0.529711,0.594427
MXN_y,0.142153,0.239413,0.041638,0.82382,1.0,0.597636,0.578623,0.236806,0.356754,0.138704,0.468581,0.009649,0.024161,0.911089,0.009649,0.129199,0.597269,0.009649,0.04623,0.004696,0.039477
EURGBP_y,0.563814,0.668816,0.542179,0.242773,0.735395,1.0,0.29488,0.810727,0.47376,0.762226,0.905212,0.74529,0.685765,0.833188,0.74529,0.764947,0.854293,0.74529,0.672303,0.56634,0.686857
ZAR_y,0.977715,0.949927,0.826688,0.477048,0.384786,0.66414,1.0,0.182327,0.675306,0.887204,0.423863,0.919262,0.906228,0.859293,0.919262,0.841958,0.943894,0.919262,0.573077,0.929628,0.513756
RUB_y,0.80832,0.271971,0.362427,0.195905,0.009174,0.023054,0.735863,1.0,0.000491,0.001892,0.758291,0.001788,0.000291,0.01055,0.001788,0.001649,0.502764,0.001788,0.001722,0.000312,0.001615
CNY_y,0.649546,0.812215,0.572564,0.719691,0.664966,0.964631,0.360822,0.702449,1.0,0.948331,0.933103,0.852208,0.903171,0.926932,0.852208,0.944322,0.656571,0.852208,0.953697,0.897077,0.950504
Sent_Sum_y,0.792426,0.376371,0.915742,0.440091,0.650506,0.64255,0.064911,0.756529,0.301833,1.0,0.785791,0.003041,0.535108,0.211398,0.003041,0.617369,0.769901,0.003041,1.0,0.527227,0.484917


In [None]:
###################################################################################################################

In [28]:
#Import Crypto Time Series Data
cry2 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/crypto_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
cry2.tail()

Unnamed: 0_level_0,Bitcoin,ETH,LINK,XPR,BNB,ALGO,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2022-04-26,38117.46094,2808.29834,12.354577,0.641699,385.483063,0.655285,-3859,-0.24155,15976,-27256.0,-1.706059,15976,-42725.7,-2.674368,15976,-868375
2022-04-27,39241.12109,2888.929688,12.73311,0.652318,391.445831,0.670329,-3603,-0.241149,14941,-27635.7,-1.849655,14941,-39186.1,-2.622723,14941,-871978
2022-04-28,39773.82813,2936.940918,12.662813,0.644363,406.718201,0.699503,-3314,-0.139672,23727,-7103.7,-0.299393,23727,-36851.5,-1.553146,23727,-875292
2022-04-29,38609.82422,2815.601807,11.970266,0.611407,393.062164,0.636792,-4477,-0.243938,18353,-5001.3,-0.272506,18353,-44580.1,-2.429036,18353,-879769
2022-04-30,37714.875,2730.186768,10.997047,0.587091,377.767822,0.567972,-73,-0.005441,13416,-33095.4,-2.46686,13416,-6689.3,-0.498606,13416,-879842


In [138]:
#fx Futures Differencing
cry2_diff = cry2.diff()
cry2_diff.head()

Unnamed: 0_level_0,Bitcoin,ETH,LINK,XPR,BNB,ALGO,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2021-12-01,,,,,,,,,,,,,,,,
2021-12-02,-752.01172,-75.687988,-0.819117,-0.018618,-8.501404,-0.084681,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,-2879.57032,-290.596191,-1.245567,-0.050068,-24.834168,-0.139696,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-04,-4397.54296,-101.118653,-2.623798,-0.075215,-25.558716,-0.091835,-1485.0,-0.142833,981.0,-17845.5,-1.88717,981.0,-10924.5,-0.948812,981.0,-3766.0
2021-12-05,168.14453,78.735352,-1.190407,-0.041434,-11.294189,0.1068,213.0,-0.006023,-567.0,-1264.2,-0.477513,-567.0,690.6,-0.214294,-567.0,-3553.0


In [139]:
cry2_diff2_5 = cry2_diff.reset_index(drop = True)
cry2_diff2_5.tail()

Unnamed: 0,Bitcoin,ETH,LINK,XPR,BNB,ALGO,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
146,-2340.84765,-201.095215,-1.104115,-0.052727,-18.867218,-0.048029,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
147,1123.66015,80.631348,0.378533,0.010619,5.962769,0.015044,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
148,532.70704,48.01123,-0.070297,-0.007955,15.272369,0.029174,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
149,-1164.00391,-121.339111,-0.692547,-0.032956,-13.656036,-0.062711,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0
150,-894.94922,-85.415039,-0.973219,-0.024316,-15.294342,-0.06882,4404.0,0.238497,-4937.0,-28094.1,-2.194355,-4937.0,37890.8,1.93043,-4937.0,-73.0


In [140]:
#Remove the first row 
cry2_diff3 = cry2_diff2_5.drop([0])
cry2_diff4 = cry2_diff3.reset_index(drop = True)
cry2_diff4.head()

Unnamed: 0,Bitcoin,ETH,LINK,XPR,BNB,ALGO,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,-752.01172,-75.687988,-0.819117,-0.018618,-8.501404,-0.084681,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-2879.57032,-290.596191,-1.245567,-0.050068,-24.834168,-0.139696,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-4397.54296,-101.118653,-2.623798,-0.075215,-25.558716,-0.091835,-1485.0,-0.142833,981.0,-17845.5,-1.88717,981.0,-10924.5,-0.948812,981.0,-3766.0
3,168.14453,78.735352,-1.190407,-0.041434,-11.294189,0.1068,213.0,-0.006023,-567.0,-1264.2,-0.477513,-567.0,690.6,-0.214294,-567.0,-3553.0
4,1213.77734,160.414551,0.406946,0.022693,31.473084,0.013676,896.0,0.069893,-915.0,11212.9,1.022039,-915.0,7856.0,0.576181,-915.0,-2657.0


In [148]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
cry2_afi = cry2_diff4["ALGO"].values
resultcry2 = adfuller(cry2_afi)
print('ADF Statistic: %f' % resultcry2[0])
print('p-value: %f' % resultcry2[1])
print('Critical Values:')
for key, value in resultcry2[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -6.479393
p-value: 0.000000
Critical Values:
	1%: -3.476
	5%: -2.881
	10%: -2.577


In [153]:
#Granger Function Variables
maxlag = 10

In [154]:
grangers_causation_matrix2(cry2_diff4, variables = cry2_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Bitcoin_x,ETH_x,LINK_x,XPR_x,BNB_x,ALGO_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
Bitcoin_y,1.0,0.242502,0.439895,0.528338,0.376288,0.498348,0.004504,0.11171,0.004164,0.001333561,0.338679,0.004164,0.005069765,0.092562,0.004164,0.004445805
ETH_y,0.410653,1.0,0.674676,0.992021,0.013121,0.255106,0.240112,0.214208,0.15107,0.09800196,0.288642,0.15107,0.2385021,0.239487,0.15107,0.2256266
LINK_y,0.646492,0.099591,1.0,0.441468,0.014603,0.372244,0.718526,0.145666,0.637904,0.5043907,0.35334,0.637904,0.7439742,0.193749,0.637904,0.7166225
XPR_y,0.762604,0.597191,0.343756,1.0,0.060782,0.455387,0.427433,0.209088,0.546619,0.3244127,0.84571,0.546619,0.4294641,0.259603,0.546619,0.3947714
BNB_y,0.164126,0.002219,0.027022,0.62279,1.0,0.076311,0.251507,0.307754,0.341012,0.1866933,0.739375,0.341012,0.2510686,0.297422,0.341012,0.1975171
ALGO_y,0.814942,0.522006,0.001932,0.387991,0.585379,1.0,0.973117,0.112767,0.974962,0.9241618,0.624646,0.974962,0.9732938,0.089166,0.974962,0.9650005
Sent_Sum_y,0.214014,0.665002,0.865886,0.10367,0.48158,0.945634,1.0,0.000937,0.009806,1.337459e-05,0.43994,0.009806,0.9074509,0.000146,0.009806,0.7976616
Sent_Mean_y,0.983764,0.987356,0.84835,0.835201,0.927283,0.982805,0.932494,1.0,0.855557,0.7949717,0.437025,0.855557,0.9331854,0.891841,0.855557,0.9500515
Sent_Count_y,0.24821,0.549988,0.775626,0.128567,0.405756,0.926935,0.010231,0.001089,1.0,0.9171324,0.518703,1.0,0.001777603,0.000156,1.0,0.002287552
Scale_Sum_y,0.323111,0.669772,0.824465,0.125456,0.424802,0.922491,1.1e-05,0.000158,0.965498,1.0,0.551739,0.965498,9.5111e-07,2.3e-05,0.965498,6.74279e-07


In [None]:
###################################################################################################################

In [26]:
#CSI300
cs4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/CSI300_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
cs4.tail()

Unnamed: 0_level_0,CSI300,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,3814.909912,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-599156.0
2022-04-26,3784.120117,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-603015.0
2022-04-27,3895.540039,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-606618.0
2022-04-28,3921.110107,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-609932.0
2022-04-29,4016.23999,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-614409.0


In [27]:
#fx Futures Differencing
cs4_diff = cs4.diff()
cs4_diff.head()

Unnamed: 0_level_0,CSI300,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,12.310058,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,44.859864,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,-8.399903,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,29.479981,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [157]:
cs4_diff2_5 = cs4_diff.reset_index(drop = True)
cs4_diff2_5.tail()

Unnamed: 0,CSI300,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
95,-198.340088,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
96,-30.789795,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
97,111.419922,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
98,25.570068,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
99,95.129883,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [158]:
#Remove the first row 
cs4_diff3 = cs4_diff2_5.drop([0])
cs4_diff4 = cs4_diff3.reset_index(drop = True)
cs4_diff4.head()

Unnamed: 0,CSI300,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,12.310058,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,44.859864,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-8.399903,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,29.479981,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,73.830078,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [159]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
cs4_afi = cs4_diff4["CSI300"].values
resultcs4 = adfuller(cs4_afi)
print('ADF Statistic: %f' % resultcs4[0])
print('p-value: %f' % resultcs4[1])
print('Critical Values:')
for key, value in resultcs4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -10.053218
p-value: 0.000000
Critical Values:
	1%: -3.499
	5%: -2.892
	10%: -2.583


In [164]:
#Granger Function Variables
maxlag = 10

In [165]:
grangers_causation_matrix2(cs4_diff4, variables = cs4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,CSI300_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
CSI300_y,1.0,0.584354,0.489596,0.22714,0.269448,0.643908,0.22714,0.511168,0.540412,0.22714,0.344928
Sent_Sum_y,0.889243,1.0,0.474567,0.779821,0.369029,0.325777,0.779821,0.999325,0.241201,0.779821,0.903528
Sent_Mean_y,0.658717,0.998147,1.0,0.978524,0.993357,0.888604,0.978524,0.996978,0.757705,0.978524,0.995722
Sent_Count_y,0.918801,0.8711,0.499491,1.0,0.992569,0.360064,1.0,0.745832,0.283987,1.0,0.505186
Scale_Sum_y,0.871106,0.263877,0.2662,0.988667,1.0,0.381035,0.988667,0.092645,0.098275,0.988667,0.092575
Scale_Mean_y,0.530345,0.987878,0.034417,0.927143,0.969356,1.0,0.927143,0.985086,0.029856,0.927143,0.884567
Scale_Count_y,0.918801,0.8711,0.499491,1.0,0.992569,0.360064,1.0,0.745832,0.283987,1.0,0.505186
ScSe_Sum_y,0.906725,0.999431,0.492774,0.653292,0.14904,0.330313,0.653292,1.0,0.25778,0.653292,0.843836
ScSe_Mean_y,0.683773,0.992915,0.653665,0.923663,0.976774,0.906143,0.923663,0.989366,1.0,0.923663,0.985669
ScSe_Count_y,0.918801,0.8711,0.499491,1.0,0.992569,0.360064,1.0,0.745832,0.283987,1.0,0.505186


In [None]:
###################################################################################################################

In [166]:
#Nikkei 225
nk4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Nikkei225_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
nk4.tail()

Unnamed: 0_level_0,Nikkei225,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sent
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-22,27105.25977,-4496.0,-0.26135,17203.0,-57618.7,-3.34934,17203.0,-44299.9,-2.575126,17203.0,-608956.0
2022-04-25,26590.7793,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-611591.0
2022-04-26,26700.10938,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-615450.0
2022-04-27,26386.63086,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-619053.0
2022-04-28,26847.90039,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-622367.0


In [167]:
#fx Futures Differencing
nk4_diff = nk4.diff()
nk4_diff.head()

Unnamed: 0_level_0,Nikkei225,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sent
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,-182.25,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,276.20117,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,-102.20117,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,528.23047,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [168]:
nk4_diff2_5 = nk4_diff.reset_index(drop = True)
nk4_diff2_5.tail()

Unnamed: 0,Nikkei225,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sent
96,-447.80078,-941.0,-0.086494,-3128.0,-18453.5,-1.422962,-3128.0,-10624.3,-0.918759,-3128.0,-4496.0
97,-514.48047,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
98,109.33008,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
99,-313.47852,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
100,461.26953,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0


In [169]:
#Remove the first row 
nk4_diff3 = nk4_diff2_5.drop([0])
nk4_diff4 = nk4_diff3.reset_index(drop = True)
nk4_diff4.head()

Unnamed: 0,Nikkei225,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sent
0,-182.25,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,276.20117,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-102.20117,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,528.23047,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,405.01953,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [171]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
nk4_afi = nk4_diff4["Nikkei225"].values
resultnk4 = adfuller(nk4_afi)
print('ADF Statistic: %f' % resultnk4[0])
print('p-value: %f' % resultnk4[1])
print('Critical Values:')
for key, value in resultnk4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -10.211833
p-value: 0.000000
Critical Values:
	1%: -3.498
	5%: -2.891
	10%: -2.583


In [176]:
#Granger Function Variables
maxlag = 10

In [177]:
grangers_causation_matrix2(nk4_diff4, variables = nk4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Nikkei225_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sent_x
Nikkei225_y,1.0,0.032658,0.891608,0.011007,0.00502,0.169188,0.011007,0.023318,0.816714,0.011007,0.030918
Sent_Sum_y,0.680651,1.0,0.475183,0.688916,0.325619,0.464016,0.688916,0.997557,0.227233,0.688916,0.893748
Sent_Mean_y,0.209644,0.999548,1.0,0.987499,0.99753,0.689507,0.987499,0.999102,0.484143,0.987499,0.998211
Sent_Count_y,0.69443,0.773115,0.430141,1.0,0.93714,0.299683,1.0,0.588362,0.211977,1.0,0.419209
Scale_Sum_y,0.737489,0.193324,0.348741,0.994917,1.0,0.445242,0.994917,0.046632,0.165239,0.994917,0.073478
Scale_Mean_y,0.433939,0.921074,0.078317,0.950638,0.96414,1.0,0.950638,0.940272,0.159352,0.950638,0.74593
Scale_Count_y,0.69443,0.773115,0.430141,1.0,0.93714,0.299683,1.0,0.588362,0.211977,1.0,0.419209
ScSe_Sum_y,0.70402,0.998924,0.489811,0.506432,0.081532,0.442462,0.506432,1.0,0.239596,0.506432,0.773758
ScSe_Mean_y,0.231433,0.99949,0.489677,0.978124,0.996277,0.735873,0.978124,0.99888,1.0,0.978124,0.997259
ScSe_Count_y,0.69443,0.773115,0.430141,1.0,0.93714,0.299683,1.0,0.588362,0.211977,1.0,0.419209


In [None]:
###################################################################################################################

In [13]:
#Sensex
sx4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Sensex_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
sx4.tail()

Unnamed: 0_level_0,Sensex,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,56579.89,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-593766.0
2022-04-26,57356.61,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-597625.0
2022-04-27,56819.39,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-601228.0
2022-04-28,57521.06,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-604542.0
2022-04-29,57060.87,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-609019.0


In [14]:
#fx Futures Differencing
sx4_diff = sx4.diff()
sx4_diff.head()

Unnamed: 0_level_0,Sensex,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,776.5,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,-764.83,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,-949.32,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,886.51,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [15]:
sx4_diff2_5 = sx4_diff.reset_index(drop = True)
sx4_diff2_5.tail()

Unnamed: 0,Sensex,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
98,-617.26,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
99,776.72,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
100,-537.22,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
101,701.67,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
102,-460.19,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [16]:
#Remove the first row 
sx4_diff3 = sx4_diff2_5.drop([0])
sx4_diff4 = sx4_diff3.reset_index(drop = True)
sx4_diff4.head()

Unnamed: 0,Sensex,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,776.5,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-764.83,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-949.32,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,886.51,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,1016.03,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [17]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
sx4_afi = sx4_diff4["Sensex"].values
resultsx4 = adfuller(sx4_afi)
print('ADF Statistic: %f' % resultsx4[0])
print('p-value: %f' % resultsx4[1])
print('Critical Values:')
for key, value in resultsx4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -9.808889
p-value: 0.000000
Critical Values:
	1%: -3.497
	5%: -2.891
	10%: -2.582


In [24]:
#Granger Function Variables
maxlag = 32

In [25]:
grangers_causation_matrix2(sx4_diff4, variables = sx4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Sensex_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
Sensex_y,1.0,0.897078,0.628311,0.683226,0.820889,0.60619,0.683226,0.894953,0.83023,0.683226,0.971745
Sent_Sum_y,0.116256,1.0,0.398726,0.141553,0.761399,0.652878,0.141553,0.253695,0.050558,0.141553,1.0
Sent_Mean_y,0.69846,0.206214,1.0,0.165076,0.265799,0.601716,0.165076,0.20531,0.268142,0.165076,0.22424
Sent_Count_y,0.349754,0.151871,0.63973,1.0,0.094146,0.55146,1.0,0.020592,0.252391,1.0,0.080868
Scale_Sum_y,0.43271,0.733656,0.671573,0.086234,1.0,0.567983,0.086234,0.157734,0.120516,0.086234,0.772058
Scale_Mean_y,0.696543,0.040049,0.898683,0.103472,0.195784,1.0,0.103472,0.05729,0.820543,0.103472,0.020623
Scale_Count_y,0.349754,0.151871,0.63973,1.0,0.094146,0.55146,1.0,0.020592,0.252391,1.0,0.080868
ScSe_Sum_y,0.154128,0.251595,0.389247,0.028884,0.186362,0.620633,0.028884,1.0,0.049583,0.028884,0.83044
ScSe_Mean_y,0.835484,0.004846,0.191812,1.8e-05,0.007202,0.39537,1.8e-05,0.003351,1.0,1.8e-05,0.092519
ScSe_Count_y,0.349754,0.151871,0.63973,1.0,0.094146,0.55146,1.0,0.020592,0.252391,1.0,0.080868


In [None]:
###################################################################################################################

In [194]:
#German Bonds
gb4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/German_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
gb4.tail()

Unnamed: 0_level_0,Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum,scs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-04-25,0.848,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-635234.0,-0.635234
2022-04-26,0.802,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-639093.0,-0.639093
2022-04-27,0.81,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-642696.0,-0.642696
2022-04-28,0.895,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-646010.0,-0.64601
2022-04-29,0.935,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-650487.0,-0.650487


In [195]:
#fx Futures Differencing
gb4_diff = gb4.diff()
gb4_diff.head()

Unnamed: 0_level_0,Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum,scs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-12-01,,,,,,,,,,,,
2021-12-02,-0.042,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0,-0.002584
2021-12-03,-0.005,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0,-0.002281
2021-12-06,-0.009,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0,-0.002657
2021-12-07,0.012,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0,-0.006842


In [196]:
gb4_diff2_5 = gb4_diff.reset_index(drop = True)
gb4_diff2_5.tail()

Unnamed: 0,Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum,scs
104,-0.115,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0,-0.002635
105,-0.046,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0,-0.003859
106,0.008,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0,-0.003603
107,0.085,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0,-0.003314
108,0.04,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0,-0.004477


In [197]:
#Remove the first row 
gb4_diff3 = gb4_diff2_5.drop([0])
gb4_diff4 = gb4_diff3.reset_index(drop = True)
gb4_diff4.head()

Unnamed: 0,Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum,scs
0,-0.042,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0,-0.002584
1,-0.005,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0,-0.002281
2,-0.009,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0,-0.002657
3,0.012,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0,-0.006842
4,0.066,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0,-0.003337


In [198]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
gb4_afi = gb4_diff4["Price"].values
resultgb4 = adfuller(gb4_afi)
print('ADF Statistic: %f' % resultgb4[0])
print('p-value: %f' % resultgb4[1])
print('Critical Values:')
for key, value in resultgb4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -4.735769
p-value: 0.000072
Critical Values:
	1%: -3.494
	5%: -2.889
	10%: -2.582


In [203]:
#Granger Function Variables
maxlag = 10

In [204]:
grangers_causation_matrix2(gb4_diff4, variables = gb4_diff4.columns, test = 'ssr_ftest')



Unnamed: 0,Price_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x,scs_x
Price_y,1.0,0.000445,0.194539,5e-05,5e-05,0.152837,5e-05,0.000315,0.208683,5e-05,0.000388,0.000388
Sent_Sum_y,0.826079,1.0,0.509835,0.698228,0.364592,0.217647,0.698228,0.998738,0.276855,0.698228,0.888,0.888
Sent_Mean_y,0.132448,0.997298,1.0,0.974508,0.989841,0.974087,0.974508,0.996065,0.400075,0.974508,0.991779,0.991779
Sent_Count_y,0.871809,0.825034,0.541776,1.0,0.992002,0.285563,1.0,0.681961,0.334827,1.0,0.410901,0.410901
Scale_Sum_y,0.87793,0.337554,0.389829,0.985378,1.0,0.258465,0.985378,0.112147,0.181675,0.985378,0.099906,0.099906
Scale_Mean_y,0.726839,0.976447,0.735554,0.943635,0.945394,1.0,0.943635,0.974808,0.761869,0.943635,0.827137,0.827137
Scale_Count_y,0.871809,0.825034,0.541776,1.0,0.992002,0.285563,1.0,0.681961,0.334827,1.0,0.410901,0.410901
ScSe_Sum_y,0.829702,0.99921,0.520064,0.55059,0.131093,0.216859,0.55059,1.0,0.285873,0.55059,0.811962,0.811962
ScSe_Mean_y,0.08335,0.993822,0.41167,0.938942,0.981297,0.988254,0.938942,0.99163,1.0,0.938942,0.98121,0.98121
ScSe_Count_y,0.871809,0.825034,0.541776,1.0,0.992002,0.285563,1.0,0.681961,0.334827,1.0,0.410901,0.410901


In [None]:
###################################################################################################################

In [205]:
#Gold Price
gp4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Gold_Price_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
gp4.tail()

Unnamed: 0_level_0,Gold_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,1895.0,-2635,-0.167653,15717,-36474.0,-2.320672,15717,-28553.8,-1.816746,15717,-633946
2022-04-26,1904.6,-3859,-0.24155,15976,-27256.0,-1.706059,15976,-42725.7,-2.674368,15976,-637805
2022-04-27,1885.8,-3603,-0.241149,14941,-27635.7,-1.849655,14941,-39186.1,-2.622723,14941,-641408
2022-04-28,1888.5,-3314,-0.139672,23727,-7103.7,-0.299393,23727,-36851.5,-1.553146,23727,-644722
2022-04-29,1911.3,-4477,-0.243938,18353,-5001.3,-0.272506,18353,-44580.1,-2.429036,18353,-649199


In [206]:
#fx Futures Differencing
gp4_diff = gp4.diff()
gp4_diff.head()

Unnamed: 0_level_0,Gold_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,-24.3,-2142.0,-0.329494,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,2.6,303.0,0.085683,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,11.1,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,2.7,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [207]:
gp4_diff2_5 = gp4_diff.reset_index(drop = True)
gp4_diff2_5.tail()

Unnamed: 0,Gold_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
103,-46.6,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
104,9.6,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
105,-18.8,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
106,2.7,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
107,22.8,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [208]:
#Remove the first row 
gp4_diff3 = gp4_diff2_5.drop([0])
gp4_diff4 = gp4_diff3.reset_index(drop = True)
gp4_diff4.head()

Unnamed: 0,Gold_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,-24.3,-2142.0,-0.329494,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,2.6,303.0,0.085683,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,11.1,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,2.7,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,2.4,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [210]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
gp4_afi = gp4_diff4["Gold_Price"].values
resultgp4 = adfuller(gp4_afi)
print('ADF Statistic: %f' % resultgp4[0])
print('p-value: %f' % resultgp4[1])
print('Critical Values:')
for key, value in resultgp4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -11.312028
p-value: 0.000000
Critical Values:
	1%: -3.494
	5%: -2.889
	10%: -2.582


In [215]:
#Granger Function Variables
maxlag = 10

In [216]:
grangers_causation_matrix2(gp4_diff4, variables = gp4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Gold_Price_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
Gold_Price_y,1.0,0.003813,0.653071,7.8e-05,0.000629,0.212686,7.8e-05,0.002058,0.470841,7.8e-05,0.030187
Sent_Sum_y,0.489212,1.0,0.54518,0.700098,0.381139,0.216321,0.700098,0.998721,0.309025,0.700098,0.889705
Sent_Mean_y,0.199952,0.996167,1.0,0.967413,0.986226,0.955984,0.967413,0.993736,0.34514,0.967413,0.987147
Sent_Count_y,0.388975,0.834972,0.583035,1.0,0.992277,0.294069,1.0,0.696219,0.379341,1.0,0.422431
Scale_Sum_y,0.564767,0.347206,0.39897,0.986788,1.0,0.285282,0.986788,0.117866,0.184507,0.986788,0.104958
Scale_Mean_y,0.275721,0.980691,0.63287,0.942177,0.95137,1.0,0.942177,0.979573,0.614283,0.942177,0.837652
Scale_Count_y,0.388975,0.834972,0.583035,1.0,0.992277,0.294069,1.0,0.696219,0.379341,1.0,0.422431
ScSe_Sum_y,0.508362,0.999148,0.55588,0.556078,0.144862,0.217433,0.556078,1.0,0.318583,0.556078,0.811723
ScSe_Mean_y,0.251786,0.994113,0.30781,0.934139,0.979258,0.970877,0.934139,0.991048,1.0,0.934139,0.979611
ScSe_Count_y,0.388975,0.834972,0.583035,1.0,0.992277,0.294069,1.0,0.696219,0.379341,1.0,0.422431


In [None]:
###################################################################################################################

In [217]:
#Oil Price
op4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Oil_Price_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
op4.tail()

Unnamed: 0_level_0,Oil_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,99.6,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-616410.0
2022-04-26,102.62,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-620269.0
2022-04-27,101.96,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-623872.0
2022-04-28,105.47,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-627186.0
2022-04-29,104.59,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-631663.0


In [218]:
#fx Futures Differencing
op4_diff = op4.diff()
op4_diff.head()

Unnamed: 0_level_0,Oil_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,1.16,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,-0.21,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,3.23,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,2.32,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [219]:
op4_diff2_5 = op4_diff.reset_index(drop = True)
op4_diff2_5.tail()

Unnamed: 0,Oil_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
99,-3.26,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
100,3.02,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
101,-0.66,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
102,3.51,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
103,-0.88,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [220]:
#Remove the first row 
op4_diff3 = op4_diff2_5.drop([0])
op4_diff4 = op4_diff3.reset_index(drop = True)
op4_diff4.head()

Unnamed: 0,Oil_Price,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,1.16,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-0.21,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,3.23,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,2.32,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,0.49,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [222]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
op4_afi = op4_diff4["Oil_Price"].values
resultop4 = adfuller(op4_afi)
print('ADF Statistic: %f' % resultop4[0])
print('p-value: %f' % resultop4[1])
print('Critical Values:')
for key, value in resultop4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -4.275879
p-value: 0.000489
Critical Values:
	1%: -3.502
	5%: -2.893
	10%: -2.583


In [227]:
#Granger Function Variables
maxlag = 10

In [228]:
grangers_causation_matrix2(op4_diff4, variables = op4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Oil_Price_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
Oil_Price_y,1.0,3.098261e-07,0.02262,4.7e-05,4e-06,0.001254,4.7e-05,5.357347e-07,0.014387,4.7e-05,1.971709e-07
Sent_Sum_y,0.936389,1.0,0.38741,0.650265,0.278613,0.333763,0.650265,0.9935682,0.180002,0.650265,0.8952009
Sent_Mean_y,0.380466,0.9901858,1.0,0.971367,0.978711,0.904882,0.971367,0.9857265,0.359822,0.971367,0.9756347
Sent_Count_y,0.989713,0.8543216,0.54209,1.0,0.996189,0.320839,1.0,0.7519376,0.330667,1.0,0.5354173
Scale_Sum_y,0.866861,0.2575167,0.423782,0.999621,1.0,0.401036,0.999621,0.09805323,0.24603,0.999621,0.1077332
Scale_Mean_y,0.049893,0.9418404,0.30244,0.937474,0.926964,1.0,0.937474,0.9486177,0.428221,0.937474,0.7523196
Scale_Count_y,0.989713,0.8543216,0.54209,1.0,0.996189,0.320839,1.0,0.7519376,0.330667,1.0,0.5354173
ScSe_Sum_y,0.945894,0.9967896,0.428287,0.543555,0.114468,0.335969,0.543555,1.0,0.211985,0.543555,0.7646796
ScSe_Mean_y,0.434745,0.9957346,0.259913,0.954613,0.980534,0.880722,0.954613,0.9924727,1.0,0.954613,0.9847133
ScSe_Count_y,0.989713,0.8543216,0.54209,1.0,0.996189,0.320839,1.0,0.7519376,0.330667,1.0,0.5354173


In [None]:
###################################################################################################################

In [6]:
#Steel Futures
sf4 = pd.read_csv(
    "/Users/johnc.burns/Documents/Documents/PhD Year Two/My Paper 4/Asset_Charts/Granger_Files/Steel_Futures_csv.csv",
    parse_dates=["Date"],
    index_col="Date",
).dropna()
sf4.tail()

Unnamed: 0_level_0,Steel_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-04-25,56350.0,-2635.0,-0.167653,15717.0,-36474.0,-2.320672,15717.0,-28553.8,-1.816746,15717.0,-624371.0
2022-04-26,55360.0,-3859.0,-0.24155,15976.0,-27256.0,-1.706059,15976.0,-42725.7,-2.674368,15976.0,-628230.0
2022-04-27,55310.0,-3603.0,-0.241149,14941.0,-27635.7,-1.849655,14941.0,-39186.1,-2.622723,14941.0,-631833.0
2022-04-28,56530.0,-3314.0,-0.139672,23727.0,-7103.7,-0.299393,23727.0,-36851.5,-1.553146,23727.0,-635147.0
2022-04-29,57720.0,-4477.0,-0.243938,18353.0,-5001.3,-0.272506,18353.0,-44580.1,-2.429036,18353.0,-639624.0


In [7]:
#fx Futures Differencing
sf4_diff = sf4.diff()
sf4_diff.head()

Unnamed: 0_level_0,Steel_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-12-01,,,,,,,,,,,
2021-12-02,-140.0,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
2021-12-03,-370.0,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2021-12-06,-390.0,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
2021-12-07,400.0,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0


In [8]:
sf4_diff2_5 = sf4_diff.reset_index(drop = True)
sf4_diff2_5.tail()

Unnamed: 0,Steel_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
101,-1340.0,1861.0,0.093697,-1486.0,21144.7,1.028668,-1486.0,15746.1,0.75838,-1486.0,-2635.0
102,-990.0,-1224.0,-0.073897,259.0,9218.0,0.614613,259.0,-14171.9,-0.857622,259.0,-3859.0
103,-50.0,256.0,0.000401,-1035.0,-379.7,-0.143596,-1035.0,3539.6,0.051645,-1035.0,-3603.0
104,1220.0,289.0,0.101476,8786.0,20532.0,1.550262,8786.0,2334.6,1.069577,8786.0,-3314.0
105,1190.0,-1163.0,-0.104266,-5374.0,2102.4,0.026887,-5374.0,-7728.6,-0.87589,-5374.0,-4477.0


In [9]:
#Remove the first row 
sf4_diff3 = sf4_diff2_5.drop([0])
sf4_diff4 = sf4_diff3.reset_index(drop = True)
sf4_diff4.head()

Unnamed: 0,Steel_Futures,Sent_Sum,Sent_Mean,Sent_Count,Scale_Sum,Scale_Mean,Scale_Count,ScSe_Sum,ScSe_Mean,ScSe_Count,Sent_Cul_Sum
0,-140.0,-2142.0,-0.329123,197.0,-6547.7,-0.960477,197.0,-17876.8,-2.7329,197.0,-2584.0
1,-370.0,303.0,0.085312,791.0,273.0,0.328908,791.0,1435.1,0.609542,791.0,-2281.0
2,-390.0,-376.0,-0.078963,-501.0,-7896.8,-1.342644,-501.0,-2377.9,-0.586925,-501.0,-2657.0
3,400.0,-4185.0,-0.052656,8587.0,-61339.6,-1.934859,8587.0,-39636.9,-0.481654,8587.0,-6842.0
4,0.0,3505.0,0.149735,-4077.0,26677.4,0.333792,-4077.0,30343.2,1.160046,-4077.0,-3337.0


In [10]:
#Ad Fuller test again on the transformed data
#Test for Stationary 
#https://machinelearningmastery.com/time-series-data-stationary-python/
from statsmodels.tsa.stattools import adfuller
sf4_afi = sf4_diff4["Steel_Futures"].values
resultsf4 = adfuller(sf4_afi)
print('ADF Statistic: %f' % resultsf4[0])
print('p-value: %f' % resultsf4[1])
print('Critical Values:')
for key, value in resultsf4[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -5.761230
p-value: 0.000001
Critical Values:
	1%: -3.496
	5%: -2.890
	10%: -2.582


In [11]:
#Granger Function Variables
maxlag = 20

In [12]:
grangers_causation_matrix2(sf4_diff4, variables = sf4_diff4.columns, test = 'ssr_ftest')

Unnamed: 0,Steel_Futures_x,Sent_Sum_x,Sent_Mean_x,Sent_Count_x,Scale_Sum_x,Scale_Mean_x,Scale_Count_x,ScSe_Sum_x,ScSe_Mean_x,ScSe_Count_x,Sent_Cul_Sum_x
Steel_Futures_y,1.0,9.1e-05,0.064346,0.005409,0.006829,0.55012,0.005409,0.00026,0.069081,0.005409,0.000137
Sent_Sum_y,0.999898,1.0,0.799112,0.699529,0.754505,0.480097,0.699529,0.999996,0.537879,0.699529,0.999952
Sent_Mean_y,0.226441,0.106773,1.0,0.058842,0.069034,0.841719,0.058842,0.087584,0.761032,0.058842,0.393517
Sent_Count_y,0.999994,0.75264,0.815077,1.0,0.980562,0.367945,1.0,0.542968,0.616007,1.0,0.469323
Scale_Sum_y,0.999973,0.713174,0.709429,0.977771,1.0,0.392598,0.977771,0.360129,0.487319,0.977771,0.589168
Scale_Mean_y,0.631529,0.350569,0.237968,0.377904,0.382681,1.0,0.377904,0.341024,0.212642,0.377904,0.270576
Scale_Count_y,0.999994,0.75264,0.815077,1.0,0.980562,0.367945,1.0,0.542968,0.616007,1.0,0.469323
ScSe_Sum_y,0.999952,0.999998,0.810486,0.472677,0.442623,0.463512,0.472677,1.0,0.55974,0.472677,0.999738
ScSe_Mean_y,0.352753,0.02011,0.861244,0.013401,0.011813,0.889068,0.013401,0.014438,1.0,0.013401,0.237624
ScSe_Count_y,0.999994,0.75264,0.815077,1.0,0.980562,0.367945,1.0,0.542968,0.616007,1.0,0.469323
