In [3]:
import csv
import numpy as np
import datetime
import pandas as pd
from dateutil import rrule 

DAY = 86400000000000

# Generate ruleset for holiday observances on the NYSE

def NYSE_holidays(a=datetime.date.today(), b=datetime.date.today()+datetime.timedelta(days=365)):
    rs = rrule.rruleset()

    # Include all potential holiday observances
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth=12, bymonthday=31, byweekday=rrule.FR)) # New Years Day  
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 1, bymonthday= 1))                     # New Years Day  
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 1, bymonthday= 2, byweekday=rrule.MO)) # New Years Day    
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 1, byweekday= rrule.MO(3)))            # Martin Luther King Day   
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 2, byweekday= rrule.MO(3)))            # Washington's Birthday
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, byeaster= -2))                                  # Good Friday
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 5, byweekday= rrule.MO(-1)))           # Memorial Day
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 7, bymonthday= 3, byweekday=rrule.FR)) # Independence Day
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 7, bymonthday= 4))                     # Independence Day
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 7, bymonthday= 5, byweekday=rrule.MO)) # Independence Day
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth= 9, byweekday= rrule.MO(1)))            # Labor Day
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth=11, byweekday= rrule.TH(4)))            # Thanksgiving Day
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth=12, bymonthday=24, byweekday=rrule.FR)) # Christmas  
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth=12, bymonthday=25))                     # Christmas  
    rs.rrule(rrule.rrule(rrule.YEARLY, dtstart=a, until=b, bymonth=12, bymonthday=26, byweekday=rrule.MO)) # Christmas 
    
    # Exclude potential holidays that fall on weekends
    rs.exrule(rrule.rrule(rrule.WEEKLY, dtstart=a, until=b, byweekday=(rrule.SA,rrule.SU)))

    return rs
    
# Generate ruleset for NYSE trading days

def NYSE_tradingdays(a=datetime.date.today(), b=datetime.date.today()+datetime.timedelta(days=365)):
    rs = rrule.rruleset()
    rs.rrule(rrule.rrule(rrule.DAILY, dtstart=a, until=b))
    
    # Exclude weekends and holidays
    rs.exrule(rrule.rrule(rrule.WEEKLY, dtstart=a, byweekday=(rrule.SA,rrule.SU)))
    rs.exrule(NYSE_holidays(a,b))
    
    return rs


# Count NYSE holidays days in next 4 years
hdays =list(NYSE_holidays(datetime.datetime(2011,1,1),datetime.datetime(2014,12,31)))

fields = ['caldt','spindx']
sp500 = pd.read_csv('./Datasets/S&P500.csv', skipinitialspace=True, usecols=fields)
sp500['caldt'] = pd.to_datetime(sp500['caldt'], format='%m/%d/%Y', utc=True)
sp500.rename(index=str, columns={"caldt": "date"}, inplace=True)

date_col = 'date'
sym_col = 'symbol'
exp_date_col = 'exdate'
strike_price_col = 'strike_price'
best_bid_col = 'best_bid'
best_ask_col = 'best_offer'
imp_vol_col = 'impl_volatility'
volumn_col = 'volume'
tenor_col = 'Tenor'
indx_col = 'spindx'

fields = [date_col, sym_col, exp_date_col, strike_price_col, best_bid_col, best_ask_col, imp_vol_col, volumn_col]

#Read options_data
df = pd.read_csv('./Datasets/option_data.csv', skipinitialspace=True, usecols=fields)

#Extract spxw
spxw = df[df[sym_col].str.contains('SPXW')]

#Format Dates
df[date_col] = pd.to_datetime(df[date_col], format='%m/%d/%Y', utc=True)
df[exp_date_col] = pd.to_datetime(df[exp_date_col], format='%m/%d/%Y', utc=True)

#Create Tenor
df[tenor_col]=df[exp_date_col]-df[date_col]
df = pd.merge(df, sp500, on=date_col, how='left')

ImportError: dlopen(/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/_lib/_ccallback_c.cpython-36m-darwin.so, 2): no suitable image found.  Did find:
	/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/_lib/_ccallback_c.cpython-36m-darwin.so: mach-o, but wrong architecture
	/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/scipy/_lib/_ccallback_c.cpython-36m-darwin.so: mach-o, but wrong architecture

In [49]:
#Build Moneyness
sLength = len(df[date_col])
abs_diff_col = 'abs_diff'
moneyness_col = 'moneyness'
IVATM_col = 'IVATM'

df[abs_diff_col] = abs(df[strike_price_col] - 1000 * df[indx_col])
#get the IVATM for each option grouped by start date and tenor (expiry date)
IVATM = df.loc[df.groupby([date_col, exp_date_col])[abs_diff_col].idxmin(), [date_col, exp_date_col, imp_vol_col]]
IVATM.rename(index=str, columns={imp_vol_col: IVATM_col}, inplace=True)
df = pd.merge(df, IVATM, on=[date_col, exp_date_col],  how='left')
df[moneyness_col] = np.log(df[strike_price_col] / (1000 * df[indx_col])) / \
    (np.sqrt((df[tenor_col].values / DAY).astype(float) / 365) * df[IVATM_col])

In [51]:
#Delete 0 volume entries
df=df[df[volumn_col]>0]

#Tenor criterion
# at least 10 different strikes across the same tenor
counts = df.groupby([date_col, exp_date_col]).size().reset_index(name='counts')
valid_tenors = counts[counts['counts'] > 9][[date_col, exp_date_col]]

#front maturity contract maturity must be less than 9 days and have moneyness less than -3.5
front_maturities = df.loc[df.groupby([date_col])[tenor_col].idxmin(), [date_col, tenor_col]]
front_maturities = front_maturities[(front_maturities[tenor_col].values / DAY).astype(int) < 9]
front_contracts = pd.merge(df, front_maturities, on=[date_col, tenor_col], how="inner")
front_moneyness = front_contracts[[date_col, tenor_col, moneyness_col]] \
    .groupby([date_col, tenor_col]).agg(np.min).reset_index()
valid_dates = front_moneyness[front_moneyness[moneyness_col] < -3.5][[date_col]]

df = pd.merge(df, valid_tenors, on=[date_col, exp_date_col], how="inner")
df = pd.merge(df, valid_dates, on=[date_col], how="inner")

#Maturity criterion
df=df[(df[tenor_col].values / DAY).astype(int) < 365]

#Moneyness criterion
df=df[df[moneyness_col]>-15]
df=df[df[moneyness_col]<5]

#Bid criterion
df=df[df[best_ask_col]/(0.001 + df[best_bid_col])<5]

#Holiday criterion: this is probably already taken care of by the data
# for h in hdays:
#     df=df.loc[(df[date_col]!=h.tz_localize(None))]


In [52]:
#Separate short dated
short_dated=df[(df[tenor_col].values / DAY).astype(int) < 10]
long_dated=df[(df[tenor_col].values / DAY).astype(int) >= 10]

#for short dated contract, only retain the shortest maturity
front_short = short_dated.loc[short_dated.groupby([date_col])[tenor_col].idxmin(), [date_col, tenor_col]]
short_dated = pd.merge(short_dated, front_short, on=[date_col, tenor_col], how="inner")

#money ness is between -8 and 5
short_dated = short_dated[(short_dated[moneyness_col] > -8) & (short_dated[moneyness_col] < 5)]

#at least 5 traded each day
short_dated = short_dated[short_dated[volumn_col] > 4]

In [236]:
################################################################################# 
## getting the transform of the price
#################################################################################
''' 
with the 2 factor stochastic volatility jump model the solution has the form 
 exp(alpha + beta_0 * X + beta_1 * V1 + beta_2 * V2), where beta_0 = u, and beta_1, beta_2 is the same for all
 compensators for the SVJ models v(dt, dx, dy)
 alpha = alpha0 + terms depending on compensator
'''
r = 0.005
div_rate = 0.03
kappa1 = 2
vbar1 = 0.0173
sigma1 = 0.2678
rho1 = -1

kappa2 = 11.5
vbar2 = 0.001
sigma2 = 0.3708
rho2 = -0.72

def beta(tau, u, kappa, sigma, rho):
    a = -u**2
    b = sigma * rho * u - kappa
    gamma = np.sqrt(b**2 + a * sigma**2)
    return -a * (1 - np.exp(-gamma * tau)) / (2 * gamma - (gamma + b) * (1 - np.exp(-gamma * tau)))

def alpha0(tau, u, kappa1, sigma1, rho1, vbar1, kappa2, sigma2, rho2, vbar2, r, div_rate):
    a = -u**2
    b1 = sigma1 * rho1 * u - kappa1
    gamma1 = np.sqrt(b1**2 + a * sigma1**2)
    b2 = sigma2 * rho2 * u - kappa2
    gamma2 = np.sqrt(b2**2 + a * sigma2**2)
    
    return -r * tau + (r - div_rate) * u * tau \
        - kappa1 * vbar1 * ((gamma1 + b1) / (sigma1**2) * tau + \
                2 / (sigma1**2) * np.log(1 - (gamma1 + b1) * (1 - np.exp(- gamma1 * tau)) / (2 * gamma1))) \
        - kappa2 * vbar2 * ((gamma2 + b2) / (sigma2**2) * tau + \
                2 / (sigma2**2) * np.log(1 - (gamma2 + b2) * (1 - np.exp(- gamma2 * tau)) / (2 * gamma2)))    

eta0 = 0.0033
eta1 = 20.27
eta2 = 5.074
mu_x = -0.1241
sigma_x = 0.1141
mu_y = 0.0850
rho_j = -0.3260

# the SVJJ model by Duffie et. al. (2000)
def int_theta_SVJJ(tau, u, mu_x, sigma_x, mu_y, rho_j, kappa, sigma, rho):
    a = -u**2
    b = sigma * rho * u - kappa
    gamma = np.sqrt(b**2 + a * sigma**2)
    c = 1 - rho_j * mu_y * u
    d = (gamma - b) / ((gamma - b) * c + mu_y * a) * tau \
        - 2 * mu_y * a / ((gamma * c)**2 - (b * c - mu_y * a)**2) * \
            np.log(1 - ((gamma + b) * c - mu_y * a) / (2 * gamma * c) * (1 - np.exp(-gamma * tau)))
    
    return np.exp(mu_x * u + 0.5 * sigma**2 * u**2) * d

# the transform of state (X, V1, V2) with SVJJ model
def phi_SVJJ(u, X, V1, V2, t, T, r, div_rate, kappa1, vbar1, sigma1, rho1, kappa2, vbar2, sigma2, rho2, \
             eta0, eta1, eta2, mu_x, sigma_x, mu_y, rho_j):
    tau = T - t
    lam = eta0 + eta1 * V1 + eta2 * V2
    a0 = alpha0(tau, u, kappa1, sigma1, rho1, vbar1, kappa2, sigma2, rho2, vbar2, r, div_rate)
    int_theta = int_theta_SVJJ(tau, u, mu_x, sigma_x, mu_y, rho_j, kappa1, sigma1, rho1)
    a = a0 - lam * tau + lam * int_theta
    b0 = u
    b1 = beta(tau, u, kappa1, sigma1, rho1)
    b2 = beta(tau, u, kappa2, sigma2, rho2)
    
    return np.exp(a + b0 * X + b1 * V1 + b2 * V2)


def calc_G(a, b, y, X, V1, V2, T):
    t = 0
    upperbound = 500
    sep = 5
    step_size1 = 0.001
    step_size2 = 0.2

    step_sizes = np.concatenate((step_size1 * np.ones(int(sep / step_size1)), \
                                step_size2 * np.ones(int((upperbound - sep) / step_size2))))
    v_arr1 = np.arange(start=0, stop=sep, step=step_size1)
    v_arr2 = np.arange(start=sep, stop=upperbound+step_size2, step=step_size2)
    v_arr = np.concatenate((v_arr1, v_arr2))
    u_arr = a + b * v_arr * 1j
    phi_arr = phi_SVJJ(u_arr, X, V1, V2, t, T, r, div_rate, kappa1, vbar1, sigma1, rho1, kappa2, vbar2, sigma2, rho2, \
                 eta0, eta1, eta2, mu_x, sigma_x, mu_y, rho_j)
    inv_arr = np.exp(-v_arr * 1j * y)
    temp = np.imag(phi_arr * inv_arr) / (v_arr + 1e-15)
    #trapezoidal rule
    integr = np.sum((temp[0:-1] + temp[1:]) / 2 * (step_sizes)) / np.pi
    return phi_SVJJ(a, X, V1, V2, t, T, r, div_rate, kappa1, vbar1, sigma1, rho1, kappa2, vbar2, sigma2, rho2, \
                 eta0, eta1, eta2, mu_x, sigma_x, mu_y, rho_j) / 2 - integr


X = np.log(1304)
K = 1170
y = np.log(K)
V1 = vbar1
V2 = vbar2
T = 45 / 365

G0 = calc_G(1e-7, 1, y, X, V1, V2, T)
G1 = calc_G(1, 1, y, X, V1, V2, T)

p = K * G0 - G1

In [237]:
#Black Scholes volatility

d1 = X - y + (r + sigma**2 / 2) * T
d2 = d1 - sigma * np.sqrt(T)


6.973649619638401

In [235]:
df.iloc[3500:3510]

Unnamed: 0,date,symbol,exdate,strike_price,best_bid,best_offer,volume,impl_volatility,Tenor,spindx,abs_diff,IVATM,moneyness
3856,2011-02-02 00:00:00+00:00,SPX 110319P1170000,2011-03-19 00:00:00+00:00,1170000,3.6,5.1,343,0.235007,45 days,1304.03,134030.0,0.150636,-2.050519
3857,2011-02-02 00:00:00+00:00,SPX 110319P1175000,2011-03-19 00:00:00+00:00,1175000,3.9,5.0,8281,0.229276,45 days,1304.03,129030.0,0.150636,-1.969894
3858,2011-02-02 00:00:00+00:00,SPX 110319P1180000,2011-03-19 00:00:00+00:00,1180000,4.1,5.6,13,0.227402,45 days,1304.03,124030.0,0.150636,-1.889612
3859,2011-02-02 00:00:00+00:00,SPX 110319P1185000,2011-03-19 00:00:00+00:00,1185000,4.5,6.0,2503,0.22512,45 days,1304.03,119030.0,0.150636,-1.809669
3860,2011-02-02 00:00:00+00:00,SPX 110319P1190000,2011-03-19 00:00:00+00:00,1190000,4.9,6.4,3,0.222465,45 days,1304.03,114030.0,0.150636,-1.730062
3861,2011-02-02 00:00:00+00:00,SPX 110319P1195000,2011-03-19 00:00:00+00:00,1195000,4.9,6.5,15,0.215525,45 days,1304.03,109030.0,0.150636,-1.65079
3862,2011-02-02 00:00:00+00:00,SPX 110319P1200000,2011-03-19 00:00:00+00:00,1200000,5.6,6.5,15642,0.211834,45 days,1304.03,104030.0,0.150636,-1.571848
3863,2011-02-02 00:00:00+00:00,SPX 110319P1205000,2011-03-19 00:00:00+00:00,1205000,5.5,7.4,5,0.208387,45 days,1304.03,99030.0,0.150636,-1.493234
3864,2011-02-02 00:00:00+00:00,SPX 110319P1210000,2011-03-19 00:00:00+00:00,1210000,6.0,7.9,473,0.205624,45 days,1304.03,94030.0,0.150636,-1.414946
3865,2011-02-02 00:00:00+00:00,SPX 110319P1215000,2011-03-19 00:00:00+00:00,1215000,6.4,8.3,1256,0.201531,45 days,1304.03,89030.0,0.150636,-1.336981


In [94]:
b2

(0.0004904896889500047+0.05310359500192973j)

In [111]:
np.log(3.4)

1.2237754316221157

In [121]:
print(datetime.timedelta(0,1))

0:00:01
