### Import Historical VIX Futures Prices from Quandl & Clean Data

In [1]:
import os
import numpy as np
import pandas as pd
import quandl
from datetime import datetime
quandl.ApiConfig.api_key = "JxFC1vkwW6v9aMMJvMLA"

#### Call Historical prices for continuous lead futures contract (VX1) via Quandl API & Load into pandas dataframe

In [2]:
vx1 = quandl.get("CHRIS/CBOE_VX1")
vx1.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Settle,Change,Total Volume,EFP,Prev. Day Open Interest
Trade Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2005-06-20,148.3,148.3,148.3,148.3,148.3,148.3,0.0,0.0,0.0
2005-06-21,149.0,149.0,149.0,149.0,149.1,0.8,50.0,0.0,50.0
2005-06-22,148.4,148.4,148.4,148.4,148.6,-0.5,10.0,0.0,60.0
2005-06-23,149.8,150.5,149.1,150.5,151.2,2.6,61.0,0.0,121.0
2005-06-24,150.9,152.0,150.5,152.0,152.7,1.5,9.0,0.0,117.0
2005-06-27,152.8,152.8,152.2,152.2,152.3,-0.4,26.0,0.0,142.0
2005-06-28,150.2,150.2,150.2,150.2,150.2,-2.1,0.0,0.0,137.0
2005-06-29,151.2,151.2,150.5,151.2,151.3,1.1,45.0,0.0,181.0
2005-06-30,151.0,151.0,151.0,151.0,151.0,-0.3,0.0,0.0,150.0
2005-07-01,150.7,150.7,150.7,150.7,150.7,-0.3,0.0,0.0,150.0


#### Adjust early historical futures prices (on or before March 23, 2007) to deal with 10x multiplier 

In [3]:
# Adjust all historical prices where appropriate => DIVIDE by a Factor of 10
adj_prices = vx1.loc[(vx1.index <= '2007-03-23'), 'Open': 'Change'].applymap(lambda x: np.round(x/10, decimals=2))

# Map adjusted prices back onto master time series
vx1.loc[(vx1.index <= '2007-03-23'), 'Open': 'Change'] = adj_prices

In [4]:
#vx1.head(10)
vx1.loc[(vx1.index >= '2006-01-17') & (vx1.index <= '2006-01-27')]

Unnamed: 0_level_0,Open,High,Low,Close,Settle,Change,Total Volume,EFP,Prev. Day Open Interest
Trade Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2006-01-17,11.7,11.77,11.68,11.68,11.71,0.39,220.0,0.0,1010.0
2006-01-18,12.62,12.62,12.62,12.62,12.62,0.9,0.0,0.0,831.0
2006-01-19,13.15,13.15,13.09,13.15,13.11,-0.16,79.0,0.0,8135.0
2006-01-20,13.3,13.56,13.25,13.55,13.53,0.42,170.0,0.0,8140.0
2006-01-23,13.64,13.72,13.54,13.7,13.74,0.21,537.0,0.0,8520.0
2006-01-24,13.71,13.75,13.57,13.6,13.56,-0.18,772.0,0.0,8218.0
2006-01-25,13.52,13.52,13.12,13.47,13.32,-0.24,541.0,0.0,8209.0
2006-01-26,13.14,13.14,12.95,12.98,12.92,-0.4,232.0,0.0,8233.0
2006-01-27,13.47,13.47,12.69,12.73,12.72,-0.2,49.0,0.0,8212.0


#### Examine prices from a specific date range (if necessary)

In [5]:
from pandas.tseries.offsets import BDay

# Calculate last full trading/business day before today
last_tdate = pd.datetime.today() - BDay(1)

# Specify Start & End dates for range (NOTE: 'end_date' does not have to be 'last_tdate', it is just an example)
start_date = '2005-06-20'
end_date = '2005-07-28'
#end_date = last_tdate         

# Date range can have ONLY a start date:
#vx1_range = vx1[(vx1.index >= start_date)]
# Date range can also have a start AND end date:
vx1_range = vx1[(vx1.index >= start_date) & (vx1.index <= end_date)]

vx1_range.head(20)

Unnamed: 0_level_0,Open,High,Low,Close,Settle,Change,Total Volume,EFP,Prev. Day Open Interest
Trade Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2005-06-20,14.83,14.83,14.83,14.83,14.83,14.83,0.0,0.0,0.0
2005-06-21,14.9,14.9,14.9,14.9,14.91,0.08,50.0,0.0,50.0
2005-06-22,14.84,14.84,14.84,14.84,14.86,-0.05,10.0,0.0,60.0
2005-06-23,14.98,15.05,14.91,15.05,15.12,0.26,61.0,0.0,121.0
2005-06-24,15.09,15.2,15.05,15.2,15.27,0.15,9.0,0.0,117.0
2005-06-27,15.28,15.28,15.22,15.22,15.23,-0.04,26.0,0.0,142.0
2005-06-28,15.02,15.02,15.02,15.02,15.02,-0.21,0.0,0.0,137.0
2005-06-29,15.12,15.12,15.05,15.12,15.13,0.11,45.0,0.0,181.0
2005-06-30,15.1,15.1,15.1,15.1,15.1,-0.03,0.0,0.0,150.0
2005-07-01,15.07,15.07,15.07,15.07,15.07,-0.03,0.0,0.0,150.0


#### Save cleaned time series dataframe in pickle for use with other instruments 

In [6]:
vx1.to_pickle(r'C:\Users\David\Documents\IPython_Notebooks\Capstone Project\Data\vx1.pkl')

## Process Intraday VIX Futures data

In [66]:
intra_filepath = os.getcwd() + r'\Data\Intraday'

In [67]:
intra = pd.read_csv(intra_filepath + '\VIX futures hamilton.csv', parse_dates=['TIMESTAMP'], \
                     date_parser = lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S'), 
                     usecols = ['SYMBOL', 'TIMESTAMP', 'BID_PRICE', 'ASK_PRICE'])
intra.head()

Unnamed: 0,SYMBOL,TIMESTAMP,BID_PRICE,ASK_PRICE
0,VX 15F,2015-01-02 09:30:00,17.35,17.4
1,VX 15F,2015-01-02 09:40:00,17.15,17.2
2,VX 15F,2015-01-02 09:50:00,17.25,17.3
3,VX 15F,2015-01-02 10:00:00,17.35,17.4
4,VX 15F,2015-01-02 10:10:00,17.7,17.75


In [68]:
intra['Mid'] = (intra['BID_PRICE'] + intra['ASK_PRICE'])/2
intra.rename(columns={'TIMESTAMP': 'date'}, inplace=True)

In [69]:
monthly_expiry_symb = {'F':1, 'G':2,'H':3,'J':4,'K':5,'M':6,'N':7,'Q':8,'U':9,'V':10,'X':11,'Z':12}

def parse_symbol(x):
    x = x.split()[1]
    return pd.Series([2000+int(x[:2]), monthly_expiry_symb[x[2]]], index=['year', 'month'])
intra[['year', 'month']] = intra['SYMBOL'].apply(parse_symbol)
intra.head()

Unnamed: 0,SYMBOL,date,BID_PRICE,ASK_PRICE,Mid,year,month
0,VX 15F,2015-01-02 09:30:00,17.35,17.4,17.375,2015,1
1,VX 15F,2015-01-02 09:40:00,17.15,17.2,17.175,2015,1
2,VX 15F,2015-01-02 09:50:00,17.25,17.3,17.275,2015,1
3,VX 15F,2015-01-02 10:00:00,17.35,17.4,17.375,2015,1
4,VX 15F,2015-01-02 10:10:00,17.7,17.75,17.725,2015,1


In [20]:
intra[(intra['date']<'2015-01-22') & (intra['date']>='2015-01-21')]

Unnamed: 0,SYMBOL,date,BID_PRICE,ASK_PRICE,Mid,year,month
960,VX 15F,2015-01-21 09:30:00,25.15,25.50,25.325,15,1
961,VX 15F,2015-01-21 09:40:00,25.15,25.50,25.325,15,1
962,VX 15F,2015-01-21 09:50:00,25.15,25.50,25.325,15,1
963,VX 15F,2015-01-21 10:00:00,25.15,25.50,25.325,15,1
964,VX 15F,2015-01-21 10:10:00,25.15,25.50,25.325,15,1
965,VX 15F,2015-01-21 10:20:00,25.15,25.50,25.325,15,1
966,VX 15F,2015-01-21 10:30:00,25.15,25.50,25.325,15,1
967,VX 15F,2015-01-21 10:40:00,25.15,25.50,25.325,15,1
968,VX 15F,2015-01-21 10:50:00,25.15,25.50,25.325,15,1
969,VX 15F,2015-01-21 11:00:00,25.15,25.50,25.325,15,1


In [24]:
expiration = pd.read_csv(os.getcwd() + (r'\Data' + '\expiration_dates.csv'), parse_dates=['vx1', 'spx'], \
                         date_parser = lambda x: datetime.strptime(x,'%d-%b-%y'))

expiration.head()

Unnamed: 0,vx1,spx,Unnamed: 2
0,2005-10-19,2005-10-21,
1,2005-11-16,2005-11-18,28.0
2,2005-12-21,2005-12-16,28.0
3,2006-01-18,2006-01-20,35.0
4,2006-02-15,2006-02-17,28.0


In [89]:
vx_expirys = 0
sp_expirys = 1

def find_expiry(df):
    date = df['date'].iloc[0]
    for i in range(expiration.shape[vx_expirys]): # Determine month & year of VIX expiry corresponding to current date
        if expiration.iloc[i,vx_expirys].month == date.month and expiration.iloc[i,vx_expirys].year == date.year:
            break 
    
    # If trade date is the same or after the current month's VIX future expiry, use the future contract for the next month
    if date >= expiration.iloc[i,vx_expirys]:
        return df.loc[(df['year'] == expiration.iloc[i+1,vx_expirys].year)\
                      & (df['month'] == expiration.iloc[i+1,vx_expirys].month),:]
    # Otherwise (trade date is before the current month's VIX future expiry) use the future contract for the current month
    else:
        return df.loc[(df['year'] == expiration.iloc[i+0,vx_expirys].year)\
                      & (df['month'] == expiration.iloc[i+0,vx_expirys].month),:]

In [94]:
# Apply function to dataframe of intraday VIX futures prices 
intra = intra.groupby('date').apply(find_expiry)

# Take only first datetime element of index tuple created by 'find_expiry' function
intra.index = [x[0] for x in intra.index.values]

In [7]:
# Fast but not accurate
#intra = intra.groupby('date').apply(lambda x: x.iloc[0,:])

In [95]:
intra.head()

Unnamed: 0,SYMBOL,date,BID_PRICE,ASK_PRICE,Mid,year,month
2015-01-02 09:30:00,VX 15F,2015-01-02 09:30:00,17.35,17.4,17.375,2015,1
2015-01-02 09:40:00,VX 15F,2015-01-02 09:40:00,17.15,17.2,17.175,2015,1
2015-01-02 09:50:00,VX 15F,2015-01-02 09:50:00,17.25,17.3,17.275,2015,1
2015-01-02 10:00:00,VX 15F,2015-01-02 10:00:00,17.35,17.4,17.375,2015,1
2015-01-02 10:10:00,VX 15F,2015-01-02 10:10:00,17.7,17.75,17.725,2015,1


In [96]:
intra.to_pickle(intra_filepath + r'\Pickle\vix_intra(original).pkl')