In [1]:
import pandas as pd
import numpy as np
import datetime as dt

#### Read in data

In [2]:
data = pd.read_csv("quotedata.dat", skiprows=2)

#### Set Rate 

In [3]:
from bs4 import BeautifulSoup as bs4
import requests

In [4]:
url = 'https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yield'
req = requests.get(url)
soup = bs4(req.text)

In [5]:
# the one month t-bill rate
RATE = float(list(soup.find('tr', attrs=({'class':"oddrow"})))[1].text) / 100

#### Set time-now. Leave as '2020-03-23 16:00:00.000000' for proof of concept

In [6]:
# set time-now
# now = dt.datetime.now()
now = pd.to_datetime('2020-03-23 16:00:00.000000')
print(now)

2020-03-23 16:00:00


#### Set near and next timedeltas

In [7]:
# set nearest date limit
near_date = now + dt.timedelta(days=23)
print(near_date)

2020-04-15 16:00:00


In [8]:
# set farthest date limit
next_date = now + dt.timedelta(days=37)
print(next_date)

2020-04-29 16:00:00


#### Set minutes in year, and day

In [9]:
# set minutes in year
MINUTES_IN_YEAR = 525600
MINUTES_IN_DAY = 24 * 60

In [10]:
# FIX, DO NOT USE
def calculate_vix(bid, ask, R, T, F, K0, Ki, Ki_p1, Ki_m1):

    #  A
    two_div_T = 2 / T
    delta_K = (Ki_p1 - Ki_m1) / 2
    e_pow_RT = np.exp(R * T)
    Q_Ki = ((bid - ask) / 2) + Ki
    deltaK_div_KiSqrd = delta_K / (Ki**2)

    #  B
    one_div_T = 1 / T
    F_div_K0_sqrd = np.exp2((F/K0) - 1)

    A = two_div_T * deltaK_div_KiSqrd * e_pow_RT * Q_Ki
    B = one_div_T * F_div_K0_sqrd

    return A - B

In [11]:
def check_headers(input_data):
    
    '''make sure DataFrame has consistent naming convention for columns'''
    
    must_haves = ['Expiration Date', 'Strike', 'Calls', 'Bid', 'Ask', 'Puts', 'Bid.1', 'Ask.1']
    
    for i in must_haves:
        
        assert i in input_data.columns, f'data columns does not contain {i}'
    

In [12]:
def filter_quotes(input_data):
    
    # select columns
    input_data = input_data[['Expiration Date', 'Strike', 'Calls', 'Bid', 'Ask', 'Puts', 'Bid.1', 'Ask.1']]
    # filter to between near and next data
    input_data = input_data.loc[(pd.to_datetime(input_data['Expiration Date']) > near_date) & (pd.to_datetime(input_data['Expiration Date']) < next_date)]
    # convert expiration date column to datetime
    input_data['Expiration Date'] = pd.to_datetime(input_data['Expiration Date']).values
    # select only friday expiries
    input_data = input_data.loc[pd.DatetimeIndex(input_data['Expiration Date']).weekday == 4]
    # select only non-zero bids
#     input_data = input_data.loc[(input_data['Bid'] != 0) & (input_data['Bid.1'] != 0)]
    
    return input_data

In [13]:
def set_datetime(input_data):
    
    '''had to create new datetime column to replace use of Expiration Date'''
    
    # set time for weekly expiries
    input_data['ExpDT'] = input_data.loc[input_data.Calls.str.contains('W')]['Expiration Date'] + dt.timedelta(hours=16)
    # set time for standard expiries
    input_data.loc[input_data.ExpDT.isna(), 'ExpDT'] = input_data.loc[input_data.ExpDT.isna()]['Expiration Date'] + dt.timedelta(hours=9.5)
    
    return input_data

In [14]:
def set_minutes_remaining(input_data):
    
    # set big T to expiration in minutes ... input_data.ExpDT - now returns datetime[ns]
    input_data['TTE'] = (((input_data.ExpDT - now) / pd.np.timedelta64(1, 'm')) / MINUTES_IN_YEAR).values
    
    return input_data

In [15]:
def set_midpoint(input_data):
    
    input_data['calls_midpoint'] = (input_data['Ask'] + input_data['Bid']) / 2
    input_data['puts_midpoint'] = (input_data['Ask.1'] + input_data['Bid.1']) / 2
    input_data['difference'] = abs(input_data['calls_midpoint'] - input_data['puts_midpoint'])
    
    return input_data

In [16]:
def split_near_next(input_data):
    
    data = input_data.copy()
    
    data = data.set_index('Expiration Date')
    
    dates = [str(i.date()) for i in data.index.unique()]
    
    near = data.loc[data.index == dates[0]]
    
    nxt = data.loc[data.index == dates[1]]
    
    return nxt.reset_index(), near.reset_index()

In [17]:
def set_F(input_data, RATE):
    
    strike = input_data.loc[input_data.index == (input_data.loc[input_data.difference == input_data.difference.min()].index[0])]['Strike']
    call = input_data.loc[input_data.index == (input_data.loc[input_data.difference == input_data.difference.min()].index[0])]['calls_midpoint']
    put = input_data.loc[input_data.index == (input_data.loc[input_data.difference == input_data.difference.min()].index[0])]['puts_midpoint']
    tte = input_data.loc[input_data.index == (input_data.loc[input_data.difference == input_data.difference.min()].index[0])]['TTE']
    _F = strike + np.exp(tte * RATE) * (call - put)
    
    return _F.values[0]

In [18]:
def set_K0(input_data, F):
    
    _k0 = input_data.loc[input_data.Strike < F].iloc[-1]['Strike']
    
    return _k0

In [19]:
def set_deltaK(input_data):
    return ((input_data.Strike.shift(-1) - input_data.Strike.shift(1)) / 2).values

In [20]:
def set_Ki_squared(input_data):
    return (input_data.Strike ** 2).values

In [21]:
def set_deltaK_dividedby_KiSqrd(input_data):
    return (input_data.deltaK / input_data.Ki_sqrd).values

In [22]:
def set_eRT(input_data, RATE):
    return np.exp(input_data.TTE.values * RATE)

In [23]:
def set_Q_Ki(input_data, some_strike_K0):
    
    # create single midpoint column, assigning calls midpoint immediately
    input_data['midpoint'] = (input_data.Strike >= some_strike_K0) * input_data.calls_midpoint
    # assign puts midpoint
    input_data.loc[input_data.Strike < some_strike_K0 , 'midpoint'] = input_data.loc[input_data.Strike < some_strike_K0]['puts_midpoint'].values
    # average the call and put midpoint for K0
    input_data.loc[input_data.Strike == some_strike_K0 , 'midpoint'] = np.mean(input_data.loc[input_data.Strike == some_strike_K0]['puts_midpoint'].values + input_data.loc[input_data.Strike == some_strike_K0]['calls_midpoint'].values)
    
    return input_data

In [24]:
def prepare_quotes(input_data, RATE):
    
    # check headers
    try:
        check_headers(input_data)
    except AssertionError as e:
        print(e)
        return
    
    #  filter for near & next, friday expiries, non-zero bids
    mod = filter_quotes(input_data)
    #  create new expiration date column to display time
    mod = set_datetime(mod)
    #  create time to expiry in minutes
    mod = set_minutes_remaining(mod)
    #  set index
    mod = mod.reset_index()
    mod = mod.drop('index', axis=1)
    #  get midpoint
    mod = set_midpoint(mod)
    #  split
    nxt, near = split_near_next(mod)
    #  F Forward index level derived from index option prices of near
    F1 = set_F(near, RATE)
    #  F Forward index level derived from index option prices of next
    F2 = set_F(nxt, RATE)
    # Set K0 for next
    nxt_K0 = set_K0(nxt, F2)
    # set K0 for near
    near_K0 = set_K0(near, F1)
    # set Q(K_i) for near
    near = set_Q_Ki(near, near_K0)
    # set Q(K_i) for next
    nxt = set_Q_Ki(nxt, nxt_K0)
    # set delta K for near
    near['deltaK'] = set_deltaK(near)
    # set delta K for next
    nxt['deltaK'] = set_deltaK(nxt)
    # set K_i ** 2 for near
    near['Ki_sqrd'] = set_Ki_squared(near)
    # set K_i ** 2 for next
    nxt['Ki_sqrd'] = set_Ki_squared(nxt)
    # set K_i ** 2 for near
    near['delK_div_KiSqrd'] = set_deltaK_dividedby_KiSqrd(near)
    # set K_i ** 2 for next
    nxt['delK_div_KiSqrd'] = set_deltaK_dividedby_KiSqrd(nxt)
    near['eRT'] = set_eRT(near, RATE)
    # set delta K for next
    nxt['eRT'] = set_eRT(nxt, RATE)
    
    return nxt, near

In [30]:
def set_d1(input_data):
    return ((2/input_data.TTE) * input_data.delK_div_KiSqrd * input_data.eRT * input_data.midpoint).values

In [35]:
def set_d2(input_data, F, K0):
    return ((1/input_data.TTE) * np.exp2((F / K0)  - 1)).values

In [36]:
def calc_sigma(d1, d2):
    return d1 - d2

In [25]:
nxt, near = prepare_quotes(data, RATE)

In [26]:
near

Unnamed: 0,Expiration Date,Strike,Calls,Bid,Ask,Puts,Bid.1,Ask.1,ExpDT,TTE,calls_midpoint,puts_midpoint,difference,midpoint,deltaK,Ki_sqrd,delK_div_KiSqrd,eRT
0,2020-04-17,100.0,SPX200417C00100000,2186.7,2203.80,SPX200417P00100000,0.0,0.1,2020-04-17 09:30:00,0.067751,2195.250,0.05,2195.200,0.050,,10000.0,,1.000020
1,2020-04-17,100.0,SPXW200417C00100000,2184.6,2207.50,SPXW200417P00100000,0.0,0.1,2020-04-17 16:00:00,0.068493,2196.050,0.05,2196.000,0.050,50.0,10000.0,0.005000,1.000021
2,2020-04-17,200.0,SPX200417C00200000,2086.8,2103.90,SPX200417P00200000,0.0,0.1,2020-04-17 09:30:00,0.067751,2095.350,0.05,2095.300,0.050,50.0,40000.0,0.001250,1.000020
3,2020-04-17,200.0,SPXW200417C00200000,2084.7,2107.60,SPXW200417P00200000,0.0,0.1,2020-04-17 16:00:00,0.068493,2096.150,0.05,2096.100,0.050,50.0,40000.0,0.001250,1.000021
4,2020-04-17,300.0,SPX200417C00300000,1988.3,2003.60,SPX200417P00300000,0.0,0.1,2020-04-17 09:30:00,0.067751,1995.950,0.05,1995.900,0.050,50.0,90000.0,0.000556,1.000020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
757,2020-04-17,4200.0,SPXW200417C04200000,0.0,0.25,SPXW200417P04200000,1890.4,1913.3,2020-04-17 16:00:00,0.068493,0.125,1901.85,1901.725,0.125,50.0,17640000.0,0.000003,1.000021
758,2020-04-17,4300.0,SPX200417C04300000,0.0,0.35,SPX200417P04300000,1993.7,2010.8,2020-04-17 09:30:00,0.067751,0.175,2002.25,2002.075,0.175,50.0,18490000.0,0.000003,1.000020
759,2020-04-17,4300.0,SPXW200417C04300000,0.0,0.25,SPXW200417P04300000,1990.4,2013.2,2020-04-17 16:00:00,0.068493,0.125,2001.80,2001.675,0.125,50.0,18490000.0,0.000003,1.000021
760,2020-04-17,4400.0,SPX200417C04400000,0.0,0.05,SPX200417P04400000,2093.6,2110.8,2020-04-17 09:30:00,0.067751,0.025,2102.20,2102.175,0.025,50.0,19360000.0,0.000003,1.000020


In [27]:
nxt

Unnamed: 0,Expiration Date,Strike,Calls,Bid,Ask,Puts,Bid.1,Ask.1,ExpDT,TTE,calls_midpoint,puts_midpoint,difference,midpoint,deltaK,Ki_sqrd,delK_div_KiSqrd,eRT
0,2020-04-24,600.0,SPXW200424C00600000,1684.3,1704.80,SPXW200424P00600000,0.15,0.50,2020-04-24 16:00:00,0.087671,1694.550,0.325,1694.225,0.325,,360000.0,,1.000026
1,2020-04-24,700.0,SPXW200424C00700000,1584.7,1605.20,SPXW200424P00700000,0.40,0.80,2020-04-24 16:00:00,0.087671,1594.950,0.600,1594.350,0.600,100.0,490000.0,0.000204,1.000026
2,2020-04-24,800.0,SPXW200424C00800000,1485.1,1505.60,SPXW200424P00800000,0.70,1.15,2020-04-24 16:00:00,0.087671,1495.350,0.925,1494.425,0.925,100.0,640000.0,0.000156,1.000026
3,2020-04-24,900.0,SPXW200424C00900000,1385.5,1406.00,SPXW200424P00900000,1.05,1.60,2020-04-24 16:00:00,0.087671,1395.750,1.325,1394.425,1.325,100.0,810000.0,0.000123,1.000026
4,2020-04-24,1000.0,SPXW200424C01000000,1286.1,1306.60,SPXW200424P01000000,1.60,2.20,2020-04-24 16:00:00,0.087671,1296.350,1.900,1294.450,1.900,100.0,1000000.0,0.000100,1.000026
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329,2020-04-24,3750.0,SPXW200424C03750000,0.0,0.40,SPXW200424P03750000,1443.50,1464.00,2020-04-24 16:00:00,0.087671,0.200,1453.750,1453.550,0.200,50.0,14062500.0,0.000004,1.000026
330,2020-04-24,3800.0,SPXW200424C03800000,0.0,0.40,SPXW200424P03800000,1493.50,1514.00,2020-04-24 16:00:00,0.087671,0.200,1503.750,1503.550,0.200,50.0,14440000.0,0.000003,1.000026
331,2020-04-24,3850.0,SPXW200424C03850000,0.0,0.40,SPXW200424P03850000,1543.50,1564.00,2020-04-24 16:00:00,0.087671,0.200,1553.750,1553.550,0.200,50.0,14822500.0,0.000003,1.000026
332,2020-04-24,3900.0,SPXW200424C03900000,0.0,0.40,SPXW200424P03900000,1593.40,1613.90,2020-04-24 16:00:00,0.087671,0.200,1603.650,1603.450,0.200,75.0,15210000.0,0.000005,1.000026
