# Preprocess

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
ohlc = pd.read_csv('ohlc.csv', index_col='timestamp', parse_dates=True)
tweets = pd.read_csv('tweets_per_day.csv', index_col='date', parse_dates=True)
ohlc = ohlc[ohlc.index <= datetime.strptime('2018/08/31', '%Y/%m/%d')]
tweets = tweets[tweets.index <= datetime.strptime('2018/08/31', '%Y/%m/%d')]

In [3]:
def log_return(close: pd.Series):
    prev_close = close.shift(1)
    return np.log(close) - np.log(prev_close)

def RV(close: pd.Series):
    rt = log_return(close)
    rt = rt[~np.isnan(rt)] #remove nan value
    rt_2 = np.power(rt, 2)
    return np.sqrt(rt_2.sum())

In [4]:
ohlc['rv'] = ohlc['close']

ohlc1D = ohlc.resample('1D').agg({
    'open': 'first',
    'high': max,
    'low': min,
    'close': 'last',
    'volume': lambda x: np.log(x.sum()),
    'rv': lambda x: RV(x)
})

ohlc1D['rt'] = log_return(ohlc1D['close'])

tweets['tweets_per_day'] = np.log(tweets['tweets_per_day'])

combined_df = ohlc1D.join(tweets, how='inner')[['tweets_per_day', 'rv', 'volume', 'rt']]

combined_df = combined_df.replace([np.inf, -np.inf], np.nan)
combined_df.dropna(inplace=True)

  ohlc1D = ohlc.resample('1D').agg({
  ohlc1D = ohlc.resample('1D').agg({
  'volume': lambda x: np.log(x.sum()),


In [5]:
description = combined_df.describe()

skewness = combined_df.skew()
kurtosis = combined_df.kurtosis()

extra_stats = pd.DataFrame([skewness, kurtosis], index=['skew', 'kurt'])

extended_description = pd.concat([description, extra_stats])

extended_description

Unnamed: 0,tweets_per_day,rv,volume,rt
count,1548.0,1548.0,1548.0,1548.0
mean,10.325038,0.042297,9.051199,0.002283
std,0.470132,0.027147,0.768731,0.039141
min,8.89563,0.0,3.046648,-0.280892
25%,10.002846,0.025027,8.55743,-0.011928
50%,10.20985,0.035077,9.070463,0.001976
75%,10.571528,0.050414,9.58667,0.017718
max,11.955044,0.336667,11.729559,0.238396
skew,0.774798,3.016309,-0.370167,-0.28365
kurt,0.238856,16.95805,2.218066,6.237375


# VAR model

In [6]:
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

In [7]:
subsamples_1 = combined_df[combined_df.index <= datetime.strptime('2017/10/08', '%Y/%m/%d')]
subsamples_2 = combined_df[combined_df.index > datetime.strptime('2017/10/08', '%Y/%m/%d')]

## Full samples (Table 2)

### Tweets & RV

In [8]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(combined_df[['rv','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(combined_df[['rv','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -11.68140133646638.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     10:41:31
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -11.6814
Nobs:                     1541.00    HQIC:                  -11.7467
Log likelihood:           4737.45    FPE:                7.61522e-06
AIC:                     -11.7854    Det(Omega_mle):     7.46910e-06
--------------------------------------------------------------------
Results for equation rv
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                    -0.027487         0.011525           -2.385           0.017
L1.rv                     0.559641         0.026086           21.453  

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [10]:
granger_results = final_results.test_causality('rv', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'rv', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause rv. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         3.793          2.013   0.000 (7, 3052)
-----------------------------------------------
Granger causality F-test. H_0: rv does not Granger-cause tweets_per_day. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         1.801          2.013   0.083 (7, 3052)
-----------------------------------------------


### Tweets & Volume

In [11]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(combined_df[['volume','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(combined_df[['volume','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -5.132004015533787.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:24:00
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -5.13200
Nobs:                     1541.00    HQIC:                  -5.19729
Log likelihood:          -308.857    FPE:                 0.00532169
AIC:                     -5.23597    Det(Omega_mle):      0.00521958
--------------------------------------------------------------------
Results for equation volume
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                     0.374412         0.298373            1.255           0.210
L1.volume                 0.490393         0.027696           17.7

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [12]:
granger_results = final_results.test_causality('volume', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'volume', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause volume. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         5.058          2.013   0.000 (7, 3052)
-----------------------------------------------
Granger causality F-test. H_0: volume does not Granger-cause tweets_per_day. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         4.271          2.013   0.000 (7, 3052)
-----------------------------------------------


### Tweets & rt

In [13]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(combined_df[['rt','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(combined_df[['rt','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -10.169268390775917.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:35:17
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -10.1693
Nobs:                     1541.00    HQIC:                  -10.2346
Log likelihood:           3572.36    FPE:                3.45456e-05
AIC:                     -10.2732    Det(Omega_mle):     3.38828e-05
--------------------------------------------------------------------
Results for equation rt
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                    -0.040120         0.022731           -1.765           0.078
L1.rt                    -0.008782         0.025609           -0.343 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [14]:
granger_results = final_results.test_causality('rt', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'rt', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause rt. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         2.031          2.013   0.048 (7, 3052)
-----------------------------------------------
Granger causality F-test. H_0: rt does not Granger-cause tweets_per_day. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
        0.7340          2.013   0.643 (7, 3052)
-----------------------------------------------


## Subsamples 1 (Table 3)

### Tweets & RV

In [15]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(subsamples_1[['rv','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(subsamples_1[['rv','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -11.649439796596127.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:50:11
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -11.6494
Nobs:                     1249.00    HQIC:                  -11.7263
Log likelihood:           3837.52    FPE:                7.71258e-06
AIC:                     -11.7727    Det(Omega_mle):     7.53061e-06
--------------------------------------------------------------------
Results for equation rv
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                     0.000830         0.017014            0.049           0.961
L1.rv                     0.583851         0.028849           20.238 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [16]:
granger_results = final_results.test_causality('rv', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'rv', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause rv. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         1.628          2.013   0.123 (7, 2468)
-----------------------------------------------
Granger causality F-test. H_0: rv does not Granger-cause tweets_per_day. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
        0.9090          2.013   0.498 (7, 2468)
-----------------------------------------------


### Tweets & Volume

In [17]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(subsamples_1[['volume','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(subsamples_1[['volume','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -5.055154516424987.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:50:11
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -5.05515
Nobs:                     1249.00    HQIC:                  -5.13205
Log likelihood:          -280.613    FPE:                 0.00563717
AIC:                     -5.17838    Det(Omega_mle):      0.00550417
--------------------------------------------------------------------
Results for equation volume
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                     0.189880         0.466979            0.407           0.684
L1.volume                 0.500509         0.029214           17.1

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [18]:
granger_results = final_results.test_causality('volume', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'volume', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause volume. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         3.476          2.013   0.001 (7, 2468)
-----------------------------------------------
Granger causality F-test. H_0: volume does not Granger-cause tweets_per_day. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         3.525          2.013   0.001 (7, 2468)
-----------------------------------------------


### Tweets & rt

In [19]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(subsamples_1[['rt','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(subsamples_1[['rt','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -10.255339456873477.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:50:11
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -10.2553
Nobs:                     1249.00    HQIC:                  -10.3322
Log likelihood:           2966.90    FPE:                3.10921e-05
AIC:                     -10.3786    Det(Omega_mle):     3.03585e-05
--------------------------------------------------------------------
Results for equation rt
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                    -0.061640         0.033304           -1.851           0.064
L1.rt                    -0.028539         0.028445           -1.003 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [20]:
granger_results = final_results.test_causality('rt', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'rt', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause rt. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
         1.942          2.013   0.059 (7, 2468)
-----------------------------------------------
Granger causality F-test. H_0: rt does not Granger-cause tweets_per_day. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value     df   
-----------------------------------------------
        0.8389          2.013   0.555 (7, 2468)
-----------------------------------------------


## Subsamples 2 (Table 4)

### Tweets & RV

In [21]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(subsamples_2[['rv','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(subsamples_2[['rv','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 7 with a BIC of -11.62041637096008.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:50:11
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -11.6204
Nobs:                     285.000    HQIC:                  -11.8508
Log likelihood:           931.902    FPE:                6.11544e-06
AIC:                     -12.0049    Det(Omega_mle):     5.51918e-06
--------------------------------------------------------------------
Results for equation rv
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                    -0.195338         0.055767           -3.503           0.000
L1.rv                     0.430869         0.063361            6.800  

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [22]:
granger_results = final_results.test_causality('rv', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'rv', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause rv. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value    df   
----------------------------------------------
         5.052          2.027   0.000 (7, 540)
----------------------------------------------
Granger causality F-test. H_0: rv does not Granger-cause tweets_per_day. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value    df   
----------------------------------------------
         2.436          2.027   0.018 (7, 540)
----------------------------------------------


### Tweets & Volume

In [23]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(subsamples_2[['volume','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(subsamples_2[['volume','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

  self._init_dates(dates, freq)


The optimal lag length is 2 with a BIC of -5.207858979119607.
  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:50:12
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -5.20786
Nobs:                     290.000    HQIC:                  -5.28371
Log likelihood:          -39.4954    FPE:                 0.00482280
AIC:                     -5.33441    Det(Omega_mle):      0.00466070
--------------------------------------------------------------------
Results for equation volume
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                    -0.852112         0.903807           -0.943           0.346
L1.volume                 0.302693         0.085038            3.5

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [24]:
granger_results = final_results.test_causality('volume', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'volume', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause volume. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value    df   
----------------------------------------------
         18.91          3.012   0.000 (2, 570)
----------------------------------------------
Granger causality F-test. H_0: volume does not Granger-cause tweets_per_day. Conclusion: reject H_0 at 5% significance level.
Test statistic Critical value p-value    df   
----------------------------------------------
         7.246          3.012   0.001 (2, 570)
----------------------------------------------


### Tweets & rt

In [25]:
maxlags = 10  # Maximum lag length to test
bic_values = []

for lag in range(1, maxlags + 1):
    model = VAR(subsamples_2[['rt','tweets_per_day']])
    results = model.fit(lag)
    bic_values.append(results.bic)

optimal_lag = np.argmin(bic_values) + 1  # Adding 1 because index starts at 0
print(f"The optimal lag length is {optimal_lag} with a BIC of {min(bic_values)}.")

final_model = VAR(subsamples_2[['rt','tweets_per_day']])
final_results = final_model.fit(optimal_lag)
print(final_results.summary())

The optimal lag length is 1 with a BIC of -9.768672439890945.


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 08, May, 2024
Time:                     11:50:12
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -9.76867
Nobs:                     291.000    HQIC:                  -9.81407
Log likelihood:           612.540    FPE:                5.30429e-05
AIC:                     -9.84441    Det(Omega_mle):     5.19659e-05
--------------------------------------------------------------------
Results for equation rt
                       coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------------
const                    -0.090634         0.084109           -1.078           0.281
L1.rt                     0.019556         0.059002            0.331           0.740
L1.tweets_per_day         0.008533         0.00

In [26]:
granger_results = final_results.test_causality('rt', 'tweets_per_day', kind='f')
print(granger_results.summary())
granger_results = final_results.test_causality('tweets_per_day', 'rt', kind='f')
print(granger_results.summary())

Granger causality F-test. H_0: tweets_per_day does not Granger-cause rt. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value    df   
----------------------------------------------
         1.240          3.858   0.266 (1, 576)
----------------------------------------------
Granger causality F-test. H_0: rt does not Granger-cause tweets_per_day. Conclusion: fail to reject H_0 at 5% significance level.
Test statistic Critical value p-value    df   
----------------------------------------------
        0.4526          3.858   0.501 (1, 576)
----------------------------------------------
