In [24]:
import pandas as pd
from datetime import date, timedelta, datetime
from bs4 import BeautifulSoup
import requests
import re

df_vix_base = pd.read_csv('http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vixpc.csv',
                     names=['Date', 'VIX P/C Ratio', 'Puts', 'Calls', 'VIX Options Volume'],
                     index_col='Date',
                     parse_dates=['Date'],
                     skiprows=3)

df_vix_base = df_vix_base[['VIX P/C Ratio', 'VIX Options Volume']]

df_spx_base = pd.read_csv('http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/spxpc.csv',
                     names=['Date', 'SPX P/C Ratio', 'Puts', 'Calls', 'SPX Options Volume'],
                     index_col='Date',
                     parse_dates=['Date'],
                     skiprows=3)

df_spx_base = df_spx_base[['SPX P/C Ratio', 'SPX Options Volume']]

In [25]:
df_vix_base

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-02-27,0.01,7067
2006-02-28,1.75,8486
2006-03-01,0.70,3031
2006-03-02,0.18,17846
2006-03-03,21.04,24219
...,...,...
2019-09-30,0.81,243740
2019-10-01,0.97,375080
2019-10-02,0.56,759751
2019-10-03,0.46,399955


In [26]:
df_spx_base

Unnamed: 0_level_0,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-07-07,0.91,634363
2010-07-08,1.12,531457
2010-07-09,1.20,691937
2010-07-12,1.49,704517
2010-07-13,1.25,1172241
...,...,...
2019-09-30,2.26,1200417
2019-10-01,2.07,1213727
2019-10-02,1.82,2267460
2019-10-03,2.42,1547116


In [27]:
df_cboe_base = df_vix_base.join(df_spx_base, how='inner')
df_cboe_base

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-07,0.79,184762,0.91,634363
2010-07-08,1.25,141024,1.12,531457
2010-07-09,0.56,196082,1.20,691937
2010-07-12,2.01,241936,1.49,704517
2010-07-13,0.97,159440,1.25,1172241
...,...,...,...,...
2019-09-30,0.81,243740,2.26,1200417
2019-10-01,0.97,375080,2.07,1213727
2019-10-02,0.56,759751,1.82,2267460
2019-10-03,0.46,399955,2.42,1547116


In [28]:
cboe_data_dict = {}

In [29]:
n = len(df_cboe_base)
last_known_date = df_cboe_base.index[n-1]

curr_date = last_known_date
end = datetime.today()
delta = timedelta(days=1)

while curr_date <= end:
    
    y, m, d = curr_date.year, curr_date.month, curr_date.day
    print(f'Scraping date: {y}-{m:02}-{d:02}')
    
    url = f'https://markets.cboe.com/us/options/market_statistics/daily/?mkt=cone&dt={y}-{m}-{d}'
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser')
    
    date_id = 'stats-date-header'
    date_string = soup.find(id=date_id).get_text()
    date_string = ' '.join(date_string.split(' ')[-3:])
    
    page_date = datetime.strptime(date_string, "%B %d, %Y")
    
    table_class = 'bats-table bats-table--left'
    tables = soup.findAll('table', {'class' : table_class})
    
    # to find the table indices
    # Summary: 0, VIX: 5, SPX + SPXW: 6
    summary_index, vix_index, spx_index = -1, -1, -1
    for i,t in enumerate(tables):
        s = str(t)
        if 'RATIOS' in s:
            summary_index = i
        elif 'CBOE VOLATILITY INDEX (VIX)' in s:
            vix_index = i
        elif 'SPX + SPXW' in s:
            spx_index = i
        if min([summary_index, vix_index, spx_index]) >= 0:
            break
    
    table_summary = tables[summary_index]
    table_vix = tables[vix_index]
    table_spx = tables[spx_index]
    
    df_summary = pd.read_html(str(table_summary))[0]
    df_vix = pd.read_html(str(table_vix), skiprows=1)[0]
    df_spx = pd.read_html(str(table_spx), skiprows=1)[0]
        
    vix_pc_ratio = df_summary.iloc[4,1]
    spx_pc_ratio = df_summary.iloc[5,1]
    
    vix_volume = df_vix.iloc[0,3]
    spx_volume = df_spx.iloc[0,3]
    
    cboe_data_dict[page_date] = [
        vix_pc_ratio,
        vix_volume,
        spx_pc_ratio,
        spx_volume
    ]
    
    curr_date += delta

Scraping date: 2019-10-04
Scraping date: 2019-10-05
Scraping date: 2019-10-06
Scraping date: 2019-10-07
Scraping date: 2019-10-08
Scraping date: 2019-10-09
Scraping date: 2019-10-10
Scraping date: 2019-10-11
Scraping date: 2019-10-12
Scraping date: 2019-10-13
Scraping date: 2019-10-14
Scraping date: 2019-10-15
Scraping date: 2019-10-16
Scraping date: 2019-10-17
Scraping date: 2019-10-18
Scraping date: 2019-10-19
Scraping date: 2019-10-20
Scraping date: 2019-10-21
Scraping date: 2019-10-22
Scraping date: 2019-10-23
Scraping date: 2019-10-24
Scraping date: 2019-10-25
Scraping date: 2019-10-26
Scraping date: 2019-10-27
Scraping date: 2019-10-28
Scraping date: 2019-10-29
Scraping date: 2019-10-30
Scraping date: 2019-10-31
Scraping date: 2019-11-01
Scraping date: 2019-11-02
Scraping date: 2019-11-03
Scraping date: 2019-11-04
Scraping date: 2019-11-05
Scraping date: 2019-11-06
Scraping date: 2019-11-07
Scraping date: 2019-11-08
Scraping date: 2019-11-09
Scraping date: 2019-11-10
Scraping dat

In [30]:
cboe_data_dict

{datetime.datetime(2020, 4, 9, 0, 0): [0.81, 439073, 1.27, 1418972],
 datetime.datetime(2019, 10, 7, 0, 0): [0.16, 304163, 1.75, 1109175],
 datetime.datetime(2019, 10, 8, 0, 0): [0.56, 693653, 1.71, 1197655],
 datetime.datetime(2019, 10, 9, 0, 0): [0.35, 269798, 2.3, 1074665],
 datetime.datetime(2019, 10, 10, 0, 0): [0.73, 323699, 1.72, 1246904],
 datetime.datetime(2019, 10, 11, 0, 0): [1.12, 809376, 1.77, 1740135],
 datetime.datetime(2019, 10, 14, 0, 0): [0.92, 643534, 1.99, 1167873],
 datetime.datetime(2019, 10, 15, 0, 0): [1.14, 747997, 1.97, 1390507],
 datetime.datetime(2019, 10, 16, 0, 0): [0.51, 374433, 1.75, 1559984],
 datetime.datetime(2019, 10, 17, 0, 0): [0.35, 341328, 1.6, 1209540],
 datetime.datetime(2019, 10, 18, 0, 0): [0.34, 208911, 1.75, 1433833],
 datetime.datetime(2019, 10, 21, 0, 0): [0.37, 167549, 1.72, 947971],
 datetime.datetime(2019, 10, 22, 0, 0): [0.53, 210060, 1.97, 809002],
 datetime.datetime(2019, 10, 23, 0, 0): [0.11, 185841, 1.81, 910342],
 datetime.dateti

In [31]:
df_cboe_new = pd.DataFrame.from_dict(
    cboe_data_dict,
    orient='index',
    columns=['VIX P/C Ratio', 'VIX Options Volume',
             'SPX P/C Ratio', 'SPX Options Volume'])
df_cboe_new.index.rename('Date', inplace=True)
df_cboe_new.tail(20)

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-12,1.17,1712200,1.56,3038159
2020-03-13,1.04,2055169,1.58,3209449
2020-03-16,0.82,1357268,1.75,1902688
2020-03-17,0.35,1549467,2.03,1954248
2020-03-18,0.7,1030431,2.48,2514072
2020-03-19,1.37,630034,1.3,1812253
2020-03-20,1.61,877238,1.42,1732896
2020-03-23,2.67,764172,1.36,1593954
2020-03-24,1.0,807516,1.16,1522470
2020-03-25,0.91,514502,1.11,1740055


In [32]:
df_cboe_full = pd.concat([df_cboe_base, df_cboe_new]).reset_index()
df_cboe_full.tail(20)

Unnamed: 0,Date,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
2438,2020-03-12,1.17,1712200,1.56,3038159
2439,2020-03-13,1.04,2055169,1.58,3209449
2440,2020-03-16,0.82,1357268,1.75,1902688
2441,2020-03-17,0.35,1549467,2.03,1954248
2442,2020-03-18,0.7,1030431,2.48,2514072
2443,2020-03-19,1.37,630034,1.3,1812253
2444,2020-03-20,1.61,877238,1.42,1732896
2445,2020-03-23,2.67,764172,1.36,1593954
2446,2020-03-24,1.0,807516,1.16,1522470
2447,2020-03-25,0.91,514502,1.11,1740055


In [33]:
df_cboe_full = df_cboe_full.drop_duplicates(subset='Date').set_index('Date').sort_index()
df_cboe_full

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-07,0.79,184762,0.91,634363
2010-07-08,1.25,141024,1.12,531457
2010-07-09,0.56,196082,1.20,691937
2010-07-12,2.01,241936,1.49,704517
2010-07-13,0.97,159440,1.25,1172241
...,...,...,...,...
2020-04-03,0.66,351100,1.72,1362044
2020-04-06,1.14,536473,1.27,1421641
2020-04-07,1.67,380770,1.12,1315103
2020-04-08,1.11,325232,1.39,1262348


In [34]:
df_cboe_full.to_csv('CBOE-data.csv')