In [None]:
import pandas as pd
import numpy as np
import requests
import datetime
import json
import plotly.graph_objs as go
import plotly.plotly as py
from sklearn import linear_model
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode() 
import Quandl

py.sign_in('enrico.tesio','ttv7w7heht')

# Get data FROM U.S. Energy Information Administration using the below API key
api_key = '508B0FE56B9D2507A2CE82A700EE86D0'
req = 'http://api.eia.gov/series/?api_key={}&series_id=NG.N3010{}2.M'.format(api_key,'US')
r_us = requests.get(req)
data_us = json.loads(r_us.text)
dates = []
volumes = []
# set index using the total U.S. data
for element in data_us['series'][0]['data']:
    dates.append(datetime.datetime.strptime(element[0], '%Y%m').date())
    volumes.append(float(element[1]))    
df_us = pd.DataFrame(index=dates, columns=['US Consumption'])
df_us['US Consumption'] = volumes
df_us.sort_index(ascending=True, inplace=True)
df_us.index = pd.to_datetime(df_us.index)
df_us.index.name = 'Date'
# download henry hub settlements from Quandl
henry_hub = Quandl.get('FRED/GASPRICE')

conversion = 0.9756 # MMBTU to MMcft
df = pd.merge(df_us, henry_hub, how='outer', left_index=True, right_index=True).dropna(how='any')*conversion
df.columns = ['US Consumption', 'HH']

# plot data
trace_1 = go.Scatter(x=df.index, y=df['US Consumption'], name='U.S. Consumption', yaxis='y1')
trace_2 = go.Scatter(x=df.index, y=df['HH'], name='Henry Hub', yaxis='y2')
data = [trace_1, trace_2]
layout = dict(title='U.S. consumption and Henry Hub price (1994 - Today)',
             xaxis=dict(title='Time'),
             yaxis=dict(title='US gas consumption (MMcft)'),
             yaxis2=dict(title='Henry Hub price ($/MMcft)', side='right',
                         titlefont=dict(color='rgb(148, 103, 189)'),tickfont=dict(color='rgb(148, 103, 189)'),
                         overlaying='y'),
             legend=dict(x=0.75,y=1))
fig = dict(data=data, layout=layout)
iplot(fig, show_link=False)

df_ret = df.pct_change()
trace_1 = go.Scatter(x=df_ret.index, y=df_ret['US Consumption'], name='U.S. Consumption', yaxis='y1')
trace_2 = go.Scatter(x=df_ret.index, y=df_ret['HH'], name='Henry Hub', yaxis='y2')
data = [trace_1, trace_2]
layout = dict(title='U.S. consumption and Henry Hub price % variations (1994 - Today)',
             xaxis=dict(title='Time'),
             yaxis=dict(title='US gas consumption variation'),
             yaxis2=dict(title='Henry Hub return', side='right',
                         titlefont=dict(color='rgb(148, 103, 189)'),tickfont=dict(color='rgb(148, 103, 189)'),
                         overlaying='y'),
             legend=dict(x=0.75,y=1))
fig = dict(data=data, layout=layout)
iplot(fig, show_link=False)

correlation_matrix = df_ret.corr()
correlation = correlation_matrix.ix[0,1]
print('Correlation between monthly variations in gas consumption and Henry Hub monthly returns: {}%'.format(round(correlation*100,1)))

In [None]:
# LINEAR REGRESSION
x_values = df_ret['US Consumption'].dropna().values
y_values = df_ret['HH'].dropna().values
regr = linear_model.LinearRegression()
regr.fit(x_values.reshape(len(x_values),1), y_values.reshape(len(y_values),1))
regr2 = linear_model.Ridge(alpha=0.5)
regr2.fit(x_values.reshape(len(x_values),1), y_values.reshape(len(y_values),1))
y_predict = regr.predict(x_values.reshape(len(x_values),1))
y_predict2 = regr2.predict(x_values.reshape(len(x_values),1))
predicted_data = [go.Scatter(x=x_values, y=y_predict[:,0], mode='lines', name = 'linear model')]
actual_data = [go.Scatter(x=df_ret['US Consumption'], y=df_ret['HH'], mode='markers',
                         name = 'US data')]
data = actual_data+predicted_data
layout = dict(title='U.S.A. natural gas residential consumption and Henry Hub price returns',
             xaxis = dict(title='Monthly consumption variations'),
             yaxis = dict(title='Henry Hub return'),
)
fig_reg=dict(data=data, layout=layout)
iplot(fig_reg)

In [None]:
start_date = datetime.datetime(2016,5,1)
end_date = datetime.datetime(2017,3,31)
dates = pd.date_range(start_date, end_date, freq='D')
months = pd.date_range(start_date, end_date, freq='MS')

# mapping between delivery months and time series name 
#(see https://www.quandl.com/collections/futures/cme-natural-gas-futures)
mapping = {
    1:'F', 2:'G', 3:'H', 4:'J',
    5:'K', 6:'M', 7:'N', 8:'Q', 
    9:'U', 10:'V', 11:'X', 12:'Z'
}
forward_curve = pd.DataFrame(index=months, columns=['Henry Hub'])
for m in months:
    year = str(m.year)
    letter = mapping[m.month]
    series_name = 'CME/NG{0}{1}'.format(letter, year)
    temp = Quandl.get(series_name)
    value = temp.loc[temp.index==temp.last_valid_index()]['Settle'].values[0]*conversion
    forward_curve.ix[m]['Henry Hub'] = value
    
fc_data = [go.Scatter(x=forward_curve.index, y=forward_curve['Henry Hub'], name='HH Forward Curve')]
fc_layout = dict(title='Henry Hub forward curve',
                xaxis=dict(title='Delivery Month'),
                yaxis=dict(title='Forward Price ($/MMcft)'))
fc_fig = dict(data=fc_data, layout=fc_layout)
iplot(fc_fig, show_link=False)

In [None]:
# Daily Historical data

hist_daily=Quandl.get('EIA/NG_RNGWHHD_D')['2015-1-1':'2015-12-31']

hh2015_MMcft = hist_daily['Value']*conversion
hist_data = [go.Scatter(x=hist_daily.index, y=hh2015_MMcft, name='Henry Hub settlement')]
df_ma = hh2015_MMcft.rolling(window=7).mean()
hist_data = hist_data + [go.Scatter(x=df_ma.index, y=df_ma, 
                                    name='Moving Average', showlegend=False, marker=dict(color='lightgray'))]
hist_layout = dict(
    title='Henry Hub Settlement price (2015)',
    xaxis=dict(title='Day'),
    yaxis=dict(title='Price ($/MMcft)')
)
fig_2015=dict(data=hist_data, layout=hist_layout)
iplot(fig_2015, show_link=False)


hist_daily=Quandl.get('EIA/NG_RNGWHHD_D')['2005-1-1':'2005-12-31']

K = 9 # strike price

hh2005_MMcft = hist_daily['Value']*conversion
hist_data = [go.Scatter(x=hist_daily.index, y=hh2005_MMcft, name='Henry Hub settlement')]
df_ma = hh2005_MMcft.rolling(window=7).mean()
hist_data = hist_data + [go.Scatter(x=df_ma.index, y=df_ma, 
                                    name='Moving Average', showlegend=False, marker=dict(color='lightgray'))]
hist_layout = dict(
    title='Henry Hub Settlement price (2005)',
    xaxis=dict(title='Day'),
    yaxis=dict(title='Price ($/MMcft)'),
    shapes = [
        {
            'type': 'line',
            'x0': hh2005_MMcft.first_valid_index(),
            'y0': K,
            'x1': hh2005_MMcft.last_valid_index(),
            'y1': K,
            'line': {
                'color': 'black',
                'width': 2,
                'dash': 'dash',
            },
        },
    ]
)
fig_2005=dict(data=hist_data, layout=hist_layout)
iplot(fig_2005, show_link=False)