In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
%matplotlib notebook
import datetime as dt
from scipy.stats import norm
import requests
import json

In [2]:
def yahoo_opt_clean(x, type):
    x = pd.io.json.json_normalize(x['optionChain']['result'][0]['options'][0][type])
    x = x[['ask', 'bid', 'expiration', 'strike', 'inTheMoney']]
    if type == 'calls':
        x['type'] = 'C'
    elif type == 'puts':
        x['type'] = 'P'
    else:
        raise ValueError('Unknown option type')
    return x


def get_options():
    url = 'https://query2.finance.yahoo.com/v7/finance/options/SPY'
    content = requests.get(url).text
    content = json.loads(content)
    current_price = content['optionChain']['result'][0]['quote']['regularMarketPrice']
    current_date = content['optionChain']['result'][0]['quote']['regularMarketTime']
    dates = content['optionChain']['result'][0]['expirationDates']
    options = yahoo_opt_clean(content, 'calls')
    df = yahoo_opt_clean(content, 'puts')
    options = options.append(df, ignore_index=True)
    for i in range(1, len(dates)):
        content = requests.get(url + '?date=' + str(dates[i])).text
        content = json.loads(content)
        num_strikes = len(content['optionChain']['result'][0]['strikes'])
        if num_strikes > 1:
            df = yahoo_opt_clean(content, 'calls')
            options = options.append(df, ignore_index=True)
            df = yahoo_opt_clean(content, 'puts')
            options = options.append(df, ignore_index=True)
        else:
            break
    return options, current_price, current_date

options, current_price, date = get_options()

In [3]:
options.drop(options[options.inTheMoney == True].index, inplace=True)
options['price'] = (options['ask'] - options['bid'])/2 + options['bid']
options.drop(['ask', 'bid', 'inTheMoney'], axis=1, inplace=True)
options.reset_index(drop=True, inplace=True)
date = dt.datetime.fromtimestamp(int(date))
round_price = round(current_price/5)*5

In [4]:
df = options.copy(deep=True)
# Pivot
df = df.pivot(index='expiration', columns='strike', values='price')

# Drop low and high columns
df = df[[c for c in df.columns if c <= (round_price + 50)]]
df = df[[c for c in df.columns if c >= (round_price - 50)]]

# Drop columns which aren't divisible by 5
df = df[[c for c in df.columns if c % 5 == 0]]
# Drop rows with many nans
df.dropna(axis=0, thresh=df.shape[1]-3, inplace=True)
# Drop columns with any nans
df.dropna(axis=1, how='any', inplace=True)
# Convert the index to number of days from today
df.index = pd.to_datetime(df.index, unit='s')
df.index = (df.index - date).days
# Drop days farther than one year
df = df[df.index < 366]

In [5]:
df.head(20)

strike,220.0,225.0,230.0,235.0,240.0,245.0,250.0,255.0,260.0,265.0,270.0,275.0,280.0,285.0,290.0,295.0,300.0,310.0
expiration,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
9,0.065,0.085,0.105,0.135,0.17,0.22,0.31,0.46,0.745,1.32,2.43,1.245,0.22,0.035,0.015,0.005,0.01,0.005
16,0.16,0.195,0.24,0.33,0.375,0.485,0.65,0.905,1.32,1.995,3.175,1.96,0.565,0.135,0.045,0.02,0.01,0.005
36,0.42,0.49,0.61,0.745,0.92,1.165,1.49,1.94,2.57,3.5,4.915,3.045,1.315,0.49,0.175,0.07,0.04,0.02
58,0.75,0.88,1.06,1.27,1.545,1.88,2.32,2.88,3.625,4.65,6.105,4.335,2.255,1.045,0.445,0.19,0.105,0.03
86,1.15,1.36,1.61,1.91,2.275,2.725,3.285,3.985,4.87,6.015,7.5,6.08,3.64,1.975,1.0,0.505,0.265,0.1
114,1.6,1.88,2.21,2.595,3.065,3.615,4.28,5.095,6.1,7.335,8.875,7.425,4.845,2.905,1.64,0.9,0.51,0.25
128,1.84,2.145,2.5,2.93,3.425,3.625,4.735,5.595,6.625,7.895,9.47,7.9,5.285,3.285,1.915,1.09,0.61,0.22
149,2.165,2.5,2.9,3.36,3.905,4.545,5.3,6.205,7.29,8.59,10.175,8.69,6.05,3.935,2.42,1.435,0.835,0.31
212,3.18,3.315,4.16,4.735,5.415,6.19,7.07,8.09,9.28,10.665,12.27,11.37,8.59,6.21,4.305,2.865,1.85,0.76
219,3.31,3.77,4.29,4.485,5.58,5.86,7.26,7.69,9.49,10.875,11.69,12.68,8.745,6.365,5.185,2.97,1.93,1.0


In [6]:
# Get interest rate
r = 0.0188

In [7]:
def d1d2(S, K, r, sigma, T):
    # Takes T in years
    d1 = (np.log(S / K) + ((r + ((sigma**2)/2))*T)) / (sigma * np.sqrt(T))
    d2 = d1 - (sigma * np.sqrt(T))
    return d1, d2


def price_call(S, K, r, sigma, T):
    T /= 365 # Converts T from days to years
    d1, d2 = d1d2(S, K, r, sigma, T)
    c = (S * norm.cdf(d1)) - (K * np.exp(-1 * r * T) * norm.cdf(d2))
    return c

def price_put(S, K, r, sigma, T):
    T /= 365 # Converts T from days to years
    d1, d2 = d1d2(S, K, r, sigma, T)
    c = (K * np.exp(-1 * r * T) * norm.cdf(-d2)) - (S * norm.cdf(-d1))
    return c

def option_vega(S, K, r, sigma, T):
    T /= 365 # Converts T from days to years
    d1, d2 = d1d2(S, K, r, sigma, T)
    v = S*np.sqrt(T)*norm.pdf(d1)/100 # IDK why I have to divide here
    return v

In [8]:
def bs_estimate(iv, frame):
    df = frame.copy(deep=True)
    bs_price = np.zeros(df.shape)
    i = 0
    j = 0
    for rowval, row in df.iterrows():
        for colval, col in df.iteritems():
            if colval <= current_price:
                bs_price[i,j] = price_put(current_price, colval, r, iv[i, j], rowval)
            else:
                bs_price[i,j] = price_call(current_price, colval, r, iv[i, j], rowval)
            j += 1
        j = 0
        i += 1
    return bs_price

def vega_estimate(iv, frame):
    df = frame.copy(deep=True)
    vega = np.zeros(df.shape)
    i = 0
    j = 0
    for rowval, row in df.iterrows():
        for colval, col in df.iteritems():
            vega[i,j] = option_vega(current_price, colval, r, iv[i, j], rowval)
            j += 1
        j = 0
        i += 1
    return vega

# Start with a huge vol estimate to stop small values from going NaN
iv = np.full(df.shape, 3.0) 
bs_price = bs_estimate(iv, df)
res = bs_price - df.values
vega = vega_estimate(iv, df)

counter = 0
while(max(-1*res.min(), res.max()) > 0.00001):
    iv = iv - (res/vega)/200
    # Divide by 200 as a cheap hack to stop Newton's method from exploding
    # Should probably switch to bisection or secant or similar
    bs_price = bs_estimate(iv, df)
    res = bs_price - df.values
    vega = vega_estimate(iv, df)
    counter += 1
    if counter > 1000:
        break
        
print("Completed in %d iterations" % counter)

Completed in 26 iterations


In [9]:
X = list(df)
Y = df.index.values
X, Y = np.meshgrid(X, Y)
fig = plt.figure(figsize=(10,8))
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, iv, cmap=cm.coolwarm)
ax.xaxis.set_label_text('Strike')
ax.yaxis.set_label_text('Days until expiry')
date = dt.datetime.strftime(date, "%Y-%m-%d")
title_text = "Implied volatility on %s" % date
ax.set_title(title_text)
plt.show()

<IPython.core.display.Javascript object>