In [None]:
import numpy as np
import pandas as pd
import datetime as dt
import datapackage
import matplotlib.pyplot as plt
import statsmodels.api as sm

## investigating volatility linkages between oil, gas, and regionalised coal futures markets ##

## load data from Github into new frame and remove rows with absent data ##

url = 'https://raw.githubusercontent.com/joe-ascroft/phd/master/df-combined.csv'
df = pd.read_csv(url)
df["DATE"] = pd.to_datetime(df["DATE"])
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col],errors='coerce')

df = df.dropna()

In [None]:
## calculating returns ##

for col in df.columns[5:9]:
    df[col+'_r'] = df[col] - df[col].shift(1)
    
df = df.dropna()
print(df)

In [None]:
## calculating price ratios ##

df['WTI_HH'] = df['WTI_ln']/df['HH_ln']
df['WTI_HH_r'] = df['WTI_HH'] - df['WTI_HH'].shift(1)
df = df.dropna()

In [None]:
df['WTI_ln_r'].plot(title='WTI Crude Daily Returns', figsize=(12,3))

In [None]:
## markov regime switching for Henry Hub and Newcastle Gas Futures##

mod = sm.tsa.MarkovRegression(df['WTI_ln_r'][1:], k_regimes=2, order=1)
res_mod = mod.fit()
res_mod.summary()

In [None]:
res.smoothed_marginal_probabilities[3].plot(
    title='Probability of being in the high regime', figsize=(12,2))

In [None]:
## OLS on volatility output ##

X = df['NCF_ln']
y = df['HH_ln']
X = sm.add_constant(X)
mod1 = sm.OLS(y,X)
results = mod1.fit()
print(results.summary())

In [None]:
sm.qqplot(results.resid,line="45",fit="TRUE")