In [None]:
import numpy as np
import pandas as pd
import datetime as dt
import datapackage
import matplotlib.pyplot as plt
import statsmodels.api as sm

## investigating volatility linkages between oil, gas, and regionalised coal futures markets ##

## load data from Github into new frame and remove rows with absent data ##

url = 'https://raw.githubusercontent.com/joe-ascroft/phd/master/df-combined.csv'
df = pd.read_csv(url)
df["DATE"] = pd.to_datetime(df["DATE"])
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col],errors='coerce')

df = df.dropna()

In [None]:
## calculating univariate returns ##

for col in df.columns[1:5]:
    df[col+'_r'] = df[col].pct_change()
    
df.dropna()
print(df)

In [None]:
## markov regime switching for Henry Hub and Newcastle Gas Futures##

df['HH_r/NCF_r'] = df['HH_r']/df['NCF_r']
df = df.dropna()
print(df)

modhh_ncf = sm.tsa.MarkovRegression(df['HH_r/NCF_r'][1:], k_regimes=2)
res_hh_ncf = modhh_ncf.fit()
res_hh_ncf.summary()

res_hh_ncf.smoothed_marginal_probabilities[1].plot(
    title='Probability of being in the high regime', figsize=(12,3));

In [None]:
## OLS on volatility output ##

df["NCF_vol"] = sm.add_constant(df["NCF_vol"])
mod1 = sm.OLS(df["WTI_vol"][1:],df["NCF_vol"].dropna())
results = mod1.fit()
print(results.summary())