## Get VIX Future Data

In [2]:
import requests
import pandas as pd
import io
import re
import numpy as np

Get data prior to 2013 first, this is archived data on CBOE.com

In [3]:
url = 'https://www.cboe.com/us/futures/market_statistics/historical_data/archive/'
x = requests.get(url).text

#Paths for the csv files on the website
PATHS = [x[-14:] for x in re.findall('\'path\\\':\s\\\'CFE.{5}VX.csv',x)] 
NAMES = ['DATE','CONTRACT','OPEN','HIGH','LOW','CLOSE','SETTLE','CHANGE','VOLUME','EFP','OPEN_INT'] #column names

In [6]:
url = 'https://cdn.cboe.com/resources/futures/archive/volume-and-price/'

N = len(PATHS) #number of csv files to read in
VIX_FUT = pd.DataFrame(columns = ['DATE','EXPIRY_DATE','CLOSE','SETTLE','VOLUME','DAYS'])

for i in range(0,N):
    file = PATHS[i]
    response = requests.get(url+file)
    content = response.text.splitlines()

    df = pd.read_csv(io.StringIO('\n'.join(content[1:])),names=NAMES,header=None)
    if df.iloc[0,0] == 'Trade Date':
        df = df.drop(0)

    df['DATE'] = pd.to_datetime(df['DATE'])
    df['EXPIRY_DATE'] = df.iloc[-1,0]

    df['DAYS'] = df['EXPIRY_DATE'] - df['DATE']

    df[['SETTLE', 'VOLUME']] = df[['SETTLE', 'VOLUME']].apply(pd.to_numeric)
    df_use = df.loc[(df['SETTLE']>0) & (df['VOLUME']>0),['DATE','EXPIRY_DATE','CLOSE','SETTLE','VOLUME','DAYS']]

    VIX_FUT = pd.concat([VIX_FUT,df_use])

VIX_FUT['DAYS'] = VIX_FUT.DAYS.dt.days
VIX_FUT.to_csv('DATA/VIX_FUT_prior2013.csv')

Get data post 2013

In [7]:
url = 'https://www.cboe.com/us/futures/market_statistics/historical_data/'
x = requests.get(url).text

#All the csv path names
PATHS = [x[-18:] for x in re.findall('"path":.{68}csv',x)]
#Corresponding expiry dates of of the futures
EXP_DATES = [x[-11:-1] for x in re.findall('"expire_date":"[0-9]{4}-[0-9]{2}-[0-9]{2}"',x)]

In [8]:
N = len(PATHS) #number of csv files to read in
VIX_FUT = pd.DataFrame(columns = ['DATE','EXPIRY_DATE','CLOSE','SETTLE','VOLUME','DAYS'])


for i in range(0,N):
    file = PATHS[i]
    url = 'https://cdn.cboe.com/data/us/futures/market_statistics/historical_data/VX' + file
    response = requests.get(url)
    content = response.text.splitlines()

    df = pd.read_csv(io.StringIO('\n'.join(content[1:])),header=None,names=NAMES)
    df['EXPIRY_DATE'] = EXP_DATES[i]

    df['DATE'] = pd.to_datetime(df['DATE'])
    df['EXPIRY_DATE'] = pd.to_datetime(df['EXPIRY_DATE'])

    df['DAYS'] = df['EXPIRY_DATE'] - df['DATE']

    df[['SETTLE', 'VOLUME']] = df[['SETTLE', 'VOLUME']].apply(pd.to_numeric)
    df_use = df.loc[(df['SETTLE']>0) & (df['VOLUME']>0),['DATE','EXPIRY_DATE','CLOSE','SETTLE','VOLUME','DAYS']]

    VIX_FUT = pd.concat([VIX_FUT,df_use])

VIX_FUT['DAYS'] = VIX_FUT.DAYS.dt.days
VIX_FUT.to_csv('DATA/VIX_FUT_post2013.csv')

Combine both datasets and remove any duplicates that may have resulted from some crossover.

In [9]:
df1 = pd.read_csv('DATA/VIX_FUT_prior2013.csv',usecols = ['DATE','EXPIRY_DATE','CLOSE','SETTLE','DAYS'])
df1['DATE'] = pd.to_datetime(df1['DATE'])

df2 = pd.read_csv('DATA/VIX_FUT_post2013.csv',usecols = ['DATE','EXPIRY_DATE','CLOSE','SETTLE','DAYS'])
df2['DATE'] = pd.to_datetime(df2['DATE'])

DF = pd.concat([df1,df2])
DF = DF.drop_duplicates()

In [10]:
DF.to_csv('DATA/VIX_FUT.csv')