Test inspired by the workflow of these articles:

* https://www.machinelearningplus.com/time-series/vector-autoregression-examples-python/
* https://towardsdatascience.com/granger-causality-and-vector-auto-regressive-model-for-time-series-forecasting-3226a64889a6

Granger’s causality tests the null hypothesis that the coefficients of past values in the regression equation is zero.

In [5]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [6]:
df_raw = pd.read_csv('cleansed_data//all_consumption_metadata.csv', index_col=0, parse_dates=True,
                    dtype={'loc_id': 'str', 'consumption_kvah':'float32', 'temperature':'float32', 
                           'el_price':'float32', 'oil_price':'float32'})

In [7]:
df_raw.head(3)

Unnamed: 0_level_0,loc_id,consumption_kvah,temperature,el_price,oil_price
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-01 00:00:00,0,27.0,5.5,26.33,66.730003
2018-01-01 01:00:00,0,27.5,5.0,26.43,66.730003
2018-01-01 02:00:00,0,27.0,4.8,26.1,66.730003


In [8]:
d = {}
loc_ids = df_raw.loc_id.unique()

for loc in loc_ids: d[loc] = df_raw[df_raw['loc_id']==loc]

## 1 make series stationary

In [372]:
subset = d['0'].drop(['loc_id', 'oil_price'], axis=1)
subset.head(3)

Unnamed: 0_level_0,consumption_kvah,temperature,el_price
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01 00:00:00,27.0,5.5,26.33
2018-01-01 01:00:00,27.5,5.0,26.43
2018-01-01 02:00:00,27.0,4.8,26.1


In [373]:
from statsmodels.tsa.stattools import adfuller

In [374]:
def pprint_adfuller(results):
    """
    The null hypothesis of the Augmented Dickey-Fuller is that 
    there _is_ a unit root, with the alternative that there is no
    unit root. If the pvalue is above a critical size, then we 
    cannot reject that there is a unit root. 
    
    I.e. Test statistics < Critical value implies stationary
    times series.
    """
    print('Dickey-Fuller test results:')
    print(f'Test statistics:\t{results[0]:.4f}')
    print(f"""Critical value 5%:\t{results[4]['5%']:.4f}""")

In [375]:
diff_subset = subset - subset.shift(-24)
diff_subset = diff_subset.resample('M').mean().dropna()

In [376]:
diff_t = adfuller(diff_subset['temperature'])
pprint_adfuller(diff_t)

Dickey-Fuller test results:
Test statistics:	-3.2978
Critical value 5%:	-3.0685


In [377]:
diff_e = adfuller(diff_subset['el_price'])
pprint_adfuller(diff_e)

Dickey-Fuller test results:
Test statistics:	-5.3164
Critical value 5%:	-2.9865


In [378]:
diff_c = adfuller(diff_subset['consumption_kvah'])
pprint_adfuller(diff_c)

Dickey-Fuller test results:
Test statistics:	-3.4865
Critical value 5%:	-2.9865


In [379]:
from statsmodels.tsa.stattools import grangercausalitytests

Wang, Professor Wei. (2013). Vertical Specialization and Trade Surplus in China: Chandos Asian Studies Series 48 (Chandos Asian Studies Series 1). Chandos Publishing.

In [380]:
def granger_matrix(dataframe, maxlag=6):
    """
    
    """
    
    df = pd.DataFrame(columns=dataframe.columns, index=dataframe.columns)
    for s in dataframe:
        for t in dataframe:
            r = grangercausalitytests(dataframe[[s, t]], maxlag=maxlag, verbose=False)
            df.loc[s, t] = min([f"""{v[0]['ssr_ftest'][1]:.4f}""" for _, v in r.items()])
            
    return df

In [391]:
granger_matrix(diff_subset, maxlag=7)

Unnamed: 0,consumption_kvah,temperature,el_price
consumption_kvah,1.0,0.4705,0.1228
temperature,0.6728,1.0,0.4303
el_price,0.1737,0.023,1.0


Cool!

In [370]:
granger_matrices = {}

for loc, df in d.items():
    subset = df.copy().drop(['loc_id', 'oil_price'], axis=1)
    
    diff_subset = subset - subset.shift(-24)
    diff_subset = diff_subset.resample('M').mean().dropna()
    
    maxlag = int((diff_subset.shape[0] - 1) / 3) - 1
    
    granger_matrices[loc] = granger_matrix(diff_subset, maxlag=maxlag)

In [371]:
for k, v in granger_matrices.items():
    print('P-values for customer', k)
    print(v)
    print()

P-values for customer 0
                 consumption_kvah temperature el_price
consumption_kvah           1.0000      0.4705   0.1228
temperature                0.6728      1.0000   0.4303
el_price                   0.1737      0.0230   1.0000

P-values for customer 1
                 consumption_kvah temperature el_price
consumption_kvah           1.0000      0.5828   0.0762
temperature                0.2452      1.0000   0.6140
el_price                   0.1847      0.0150   1.0000

P-values for customer 3
                 consumption_kvah temperature el_price
consumption_kvah           1.0000      0.0067   0.2181
temperature                0.1797      1.0000   0.5946
el_price                   0.1002      0.0233   1.0000

P-values for customer 4
                 consumption_kvah temperature el_price
consumption_kvah           1.0000      0.0063   0.3431
temperature                0.0000      1.0000   0.2299
el_price                   0.2288      0.0010   1.0000

P-values for custome

In [625]:
el = pd.read_csv('cleansed_data//Elspot_prices.csv', index_col=0, parse_dates=True, dtype='float32')
el.head()

Unnamed: 0,price
2018-01-01 00:00:00,26.33
2018-01-01 01:00:00,26.43
2018-01-01 02:00:00,26.1
2018-01-01 03:00:00,24.700001
2018-01-01 04:00:00,24.74


In [624]:
oil = pd.read_csv('cleansed_data//Oilspot_prices.csv', index_col=0, parse_dates=True, dtype='float32')
oil['date'] = oil.index.date
oil.head()

Unnamed: 0,price,date
2017-04-06,53.619999,2017-04-06
2017-04-07,54.240002,2017-04-07
2017-04-08,54.240002,2017-04-08
2017-04-09,54.240002,2017-04-09
2017-04-10,54.790001,2017-04-10


In [587]:
w = pd.read_csv('cleansed_data//Weather.csv', index_col=0, parse_dates=True, dtype={'weather_station':'str', 'temperature':'float32'})
w.head(3)

Unnamed: 0,weather_station,temperature
2018-01-01 00:00:00,SN35210,-0.3
2018-01-01 01:00:00,SN35210,-0.5
2018-01-01 02:00:00,SN35210,-1.4


In [588]:
date_range = pd.date_range(start=min(w.index), end=max(w.index), freq='H')

In [556]:
mean_weather = pd.DataFrame(index=date_range, columns=['temperature'])

In [557]:
mean_weather['temperature'] = [np.nanmean(w.loc[ind]['temperature']) for ind in date_range]

In [558]:
mean_weather.head(3)

Unnamed: 0,temperature
2018-01-01 00:00:00,4.654545
2018-01-01 01:00:00,4.418182
2018-01-01 02:00:00,4.309091


In [611]:
w_el = mean_weather.merge(el, how='inner', left_index=True, right_index=True)
w_el['date'] = w_el.index.date
w_el_oil = w_el.merge(oil, how='inner', left_on='date', right_on='date', suffixes=('_el', '_oil'))
w_el_oil = w_el_oil.set_index('date')
w_el_oil.index = pd.to_datetime(w_el_oil.index)

In [612]:
w_el_oil.head(3)

Unnamed: 0_level_0,temperature,price_el,price_oil
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,4.654545,26.33,66.730003
2018-01-01,4.418182,26.43,66.730003
2018-01-01,4.309091,26.1,66.730003


In [613]:
diff_w_el_oil = (w_el_oil - w_el_oil.shift(-24)).resample('M').mean().dropna()

In [614]:
_ = adfuller(diff_w_el_oil['temperature'])
pprint_adfuller(_)

Dickey-Fuller test results:
Test statistics:	-5.7503
Critical value 5%:	-3.0685


In [617]:
_ = adfuller(diff_w_el_oil['price_el'])
pprint_adfuller(_)

Dickey-Fuller test results:
Test statistics:	-4.6505
Critical value 5%:	-2.9865


In [618]:
_ = adfuller(diff_w_el_oil['price_oil'])
pprint_adfuller(_)

Dickey-Fuller test results:
Test statistics:	-3.5099
Critical value 5%:	-2.9985


In [623]:
granger_matrix(diff_w_el_oil, maxlag=6)

Unnamed: 0,temperature,price_el,price_oil
temperature,1.0,0.5077,0.0977
price_el,0.0033,1.0,0.2961
price_oil,0.4707,0.0272,1.0


In [626]:
diff_w_el_oil = (w_el_oil - w_el_oil.shift(-1)).resample('M').mean().dropna()

In [627]:
_ = adfuller(diff_w_el_oil['temperature'])
pprint_adfuller(_)

Dickey-Fuller test results:
Test statistics:	-5.6211
Critical value 5%:	-3.0685


In [628]:
_ = adfuller(diff_w_el_oil['price_el'])
pprint_adfuller(_)

Dickey-Fuller test results:
Test statistics:	-4.9813
Critical value 5%:	-2.9865


In [629]:
_ = adfuller(diff_w_el_oil['price_oil'])
pprint_adfuller(_)

Dickey-Fuller test results:
Test statistics:	-3.5150
Critical value 5%:	-2.9985


In [630]:
granger_matrix(diff_w_el_oil, maxlag=6)

Unnamed: 0,temperature,price_el,price_oil
temperature,1.0,0.8537,0.0797
price_el,0.0039,1.0,0.2346
price_oil,0.5647,0.0208,1.0
