In [10]:
import pandas as pd
from datetime import date, timedelta, datetime
from bs4 import BeautifulSoup
import requests

df_cboe_base = pd.read_csv('CBOE-data.csv', index_col='Date', parse_dates=['Date'])

df_cboe_base

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-07,0.79,184762,0.91,634363
2010-07-08,1.25,141024,1.12,531457
2010-07-09,0.56,196082,1.20,691937
2010-07-12,2.01,241936,1.49,704517
2010-07-13,0.97,159440,1.25,1172241
...,...,...,...,...
2020-04-03,0.66,351100,1.72,1362044
2020-04-06,1.14,536473,1.27,1421641
2020-04-07,1.67,380770,1.12,1315103
2020-04-08,1.11,325232,1.39,1262348


In [11]:
cboe_data_dict = {}

In [12]:
n = len(df_cboe_base)
last_known_date = df_cboe_base.index[n-1]

curr_date = last_known_date
end = datetime.today()
delta = timedelta(days=1)

while curr_date <= end:
    
    y, m, d = curr_date.year, curr_date.month, curr_date.day
    print(f'Scraping date: {y}-{m}-{d}')
    
    url = f'https://markets.cboe.com/us/options/market_statistics/daily/?mkt=cone&dt={y}-{m}-{d}'
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser')
    
    date_id = 'stats-date-header'
    date_string = soup.find(id=date_id).get_text()
    date_string = ' '.join(date_string.split(' ')[-3:])
    
    page_date = datetime.strptime(date_string, "%B %d, %Y")
    
    table_class = 'bats-table bats-table--left'
    tables = soup.findAll('table', {'class' : table_class})
    
    # to find the table indices
    # Summary: 0, VIX: 5, SPX + SPXW: 6
    '''
    for i,t in enumerate(tables):
        print()
        print(f'Table {i}')
        print(str(t))
    '''
    
    table_summary = tables[0]
    table_vix = tables[5]
    table_spx = tables[6]
    
    df_summary = pd.read_html(str(table_summary))[0]
    df_vix = pd.read_html(str(table_vix), skiprows=1)[0]
    df_spx = pd.read_html(str(table_spx), skiprows=1)[0]
        
    vix_pc_ratio = df_summary.iloc[4,1]
    spx_pc_ratio = df_summary.iloc[5,1]
    
    vix_volume = df_vix.iloc[0,3]
    spx_volume = df_spx.iloc[0,3]
    
    cboe_data_dict[page_date] = [
        vix_pc_ratio,
        vix_volume,
        spx_pc_ratio,
        spx_volume
    ]
    
    curr_date += delta

Scraping date: 2020-4-9
Scraping date: 2020-4-10
Scraping date: 2020-4-11


In [13]:
cboe_data_dict

{datetime.datetime(2020, 4, 9, 0, 0): [0.81, 439073, 1.27, 1418972]}

In [14]:
df_cboe_new = pd.DataFrame.from_dict(
    cboe_data_dict,
    orient='index',
    columns=['VIX P/C Ratio', 'VIX Options Volume',
             'SPX P/C Ratio', 'SPX Options Volume'])
df_cboe_new.index.rename('Date', inplace=True)
df_cboe_new.tail(20)

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-04-09,0.81,439073,1.27,1418972


In [15]:
df_cboe_full = pd.concat([df_cboe_base, df_cboe_new]).reset_index()
df_cboe_full.tail(20)

Unnamed: 0,Date,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
2439,2020-03-16,0.82,1357268,1.75,1902688
2440,2020-03-17,0.35,1549467,2.03,1954248
2441,2020-03-18,0.7,1030431,2.48,2514072
2442,2020-03-19,1.37,630034,1.3,1812253
2443,2020-03-20,1.61,877238,1.42,1732896
2444,2020-03-23,2.67,764172,1.36,1593954
2445,2020-03-24,1.0,807516,1.16,1522470
2446,2020-03-25,0.91,514502,1.11,1740055
2447,2020-03-26,0.96,356107,1.44,1393379
2448,2020-03-27,1.2,504563,1.43,1428892


In [16]:
df_cboe_full = df_cboe_full.drop_duplicates(subset='Date').set_index('Date').sort_index()
df_cboe_full

Unnamed: 0_level_0,VIX P/C Ratio,VIX Options Volume,SPX P/C Ratio,SPX Options Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-07,0.79,184762,0.91,634363
2010-07-08,1.25,141024,1.12,531457
2010-07-09,0.56,196082,1.20,691937
2010-07-12,2.01,241936,1.49,704517
2010-07-13,0.97,159440,1.25,1172241
...,...,...,...,...
2020-04-03,0.66,351100,1.72,1362044
2020-04-06,1.14,536473,1.27,1421641
2020-04-07,1.67,380770,1.12,1315103
2020-04-08,1.11,325232,1.39,1262348


In [17]:
df_cboe_full.to_csv('CBOE-data.csv')