In [None]:
import numpy as np
import pandas as pd
import datetime as dt
import datapackage
import matplotlib.pyplot as plt
import statsmodels.api as sm

## investigating volatility linkages between oil, gas, and regionalised coal futures markets ##

## load data from Github into new frame and remove rows with absent data ##

url = 'https://raw.githubusercontent.com/joe-ascroft/phd/master/df-combined.csv'
df = pd.read_csv(url)
df["DATE"] = pd.to_datetime(df["DATE"])
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col],errors='coerce')

df = df.dropna()

In [None]:
## calculating returns ##

for col in df.columns[5:9]:
    df[col+'_r'] = df[col] - df[col].shift(1)
    
df = df.dropna()
print(df)

In [None]:
## calculating price ratios ##

df['HH_NCF'] = df['HH_ln']/df['NCF_ln']
print(df)

In [None]:
## markov regime switching for Henry Hub and Newcastle Gas Futures##

mod = sm.tsa.MarkovRegression(df['HH_NCF'][1:], k_regimes=3)
res = mod.fit()
res.summary()

res.smoothed_marginal_probabilities[1].plot(
    title='Probability of being in the high regime', figsize=(12,2))

In [None]:
## OLS on volatility output ##

mod1 = sm.OLS(df["HH_ln"],df["NCF_ln"])
results = mod1.fit()
print(results.summary())

In [None]:
plt.plot(df.DATE,df.NCF_ln)