In [3]:
import numpy as np
import pandas as pd
import datetime as dt
import datapackage
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import statsmodels.api as sm
import statsmodels.tsa.vector_ar.vecm as vecm

## investigating volatility linkages between oil, gas, and regionalised coal futures markets ##

## load data from Github into new frame and remove rows with absent data ##

url = 'https://raw.githubusercontent.com/joe-ascroft/phd/master/data/df_nzgas.csv'
df = pd.read_csv(url)
df["DATE"] = pd.to_datetime(df["DATE"],dayfirst=True)
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col],errors='coerce')
    
    

df2 = df[(df['DATE'] >= '2009-3-1') & (df['DATE'] <= '2022-6-10')]

df3 = df[(df['DATE'] < '2009-3-1')]
    
df = df.dropna()
print(df)

         DATE  Price_NZ    NZ_BTU  NZ_USD_Q  NZBTU_USD        HH    NBP_raw  \
40 2009-03-01  6.739482  7.110531  0.538933   3.832102  3.993333  38.930000   
41 2009-06-01  7.067294  7.456390  0.626667   4.672671  3.670000  30.123125   
42 2009-09-01  6.931729  7.313362  0.705367   5.158602  3.380000  26.784030   
43 2009-12-01  6.880381  7.259187  0.724933   5.262427  4.946667  31.911429   
44 2010-03-01  7.198392  7.594707  0.704300   5.348952  4.546667  36.481774   
45 2010-06-01  6.402182  6.754661  0.701033   4.735242  4.523333  38.841077   
46 2010-09-01  7.423850  7.832577  0.730467   5.721436  3.880000  42.119118   
47 2010-12-01  7.609725  8.028686  0.762800   6.124282  4.150000  47.904615   
48 2011-03-01  7.300203  7.702123  0.763000   5.876720  4.100000  57.787302   
49 2011-06-01  7.201765  7.598266  0.818767   6.221207  4.423333  58.159531   
50 2011-09-01  7.321425  7.724513  0.813533   6.284149  3.843333  57.948806   
51 2011-12-01  6.270751  6.615994  0.781067   5.1675

In [None]:
lag_order1 = vecm.select_order(data=df3[['NZBTU_USD', 'HH', 'Brent', 'NBP_USD']], maxlags=5, deterministic="ci")
print(lag_order1.selected_orders)

rank = vecm.select_coint_rank(endog=df3[['NZBTU_USD', 'HH', 'Brent', 'NBP_USD']],
                              k_ar_diff=5,
                              det_order=0)
print(rank)

In [None]:
m1 = vecm.VECM(df3[['NZBTU_USD', 'HH', 'Brent', 'NBP_USD']], deterministic="ci", k_ar_diff=1, coint_rank=1)  
vecm_res_1 = m1.fit()
vecm_res_1.summary()

In [None]:
granger_results = vecm_res_1.test_granger_causality(caused="NZBTU_USD", signif=0.05)
granger_results.summary()

In [None]:
print(vecm_res_1.summary().as_latex())

In [4]:
df.describe()

Unnamed: 0,Price_NZ,NZ_BTU,NZ_USD_Q,NZBTU_USD,HH,NBP_raw,NBP_USD,Brent,GBP_USD,NCF
count,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0
mean,6.624655,6.989382,0.728264,5.098694,3.254113,46.438884,6.887552,76.161865,1.468064,84.10107
std,0.512083,0.540277,0.073343,0.718916,0.84916,13.037701,2.238566,25.947772,0.145294,20.891391
min,5.353445,5.648185,0.538933,3.832102,1.716667,17.556032,2.180108,33.447344,1.2326,50.565833
25%,6.274772,6.620236,0.677133,4.578922,2.666667,37.661426,5.205059,53.398308,1.3064,67.382705
50%,6.727379,7.097762,0.717033,4.887548,3.08,45.614127,6.615057,68.858889,1.5335,81.930645
75%,6.938147,7.320134,0.793383,5.759198,3.856667,57.82912,9.068074,106.703993,1.5819,97.41746
max,7.609725,8.028686,0.864233,6.284149,5.186667,68.945373,11.162945,118.449062,1.6831,128.817857
