# Project 3 - Event Driven Finance
**Helgi Ingimundarson - hi2179, Bradlee Spiece - bcs2149, Giovanni Gambarotta - gg2607**

In [4]:
from sqlalchemy import create_engine
import datetime
import pandas as pd
from pygments import highlight
from pygments.lexers.sql import SqlLexer
from pygments.formatters import HtmlFormatter, LatexFormatter
from IPython import display
import functools as ft
import matplotlib.pyplot as plt
import scipy as sp
import scipy.interpolate
import numpy as np
%matplotlib inline

CONNECTION_STRING = 'mssql+pymssql://IVYuser:resuyvi@vita.ieor.columbia.edu'

# Gets the database connection
def get_connection():
    engine = create_engine(CONNECTION_STRING)
    return engine.connect()

# Query database and return results in dataframe
def query_dataframe(query, connection=None):
    if connection is None:
        connection = get_connection()
    return pd.read_sql(query, connection)

# Query database using external file and return results in dataframe
def query_dataframe_f(filename, connection=None):
    if connection is None:
        connection = get_connection()
    with open(filename, 'r') as handle:
        return pd.read_sql(handle.read(), connection)

# Print sql query and query results
def print_and_query(filename, connection=None, use_latex=False):
    if connection is None:
        connection = get_connection()
    with open(filename, 'r') as handle:
        sql = handle.read()
        if use_latex:
            display_obj = display.Latex(highlight(
                sql, SqlLexer(), LatexFormatter()))
        else:
            formatter = HtmlFormatter()
            display_obj = display.HTML(
                '<style type="text/css">{}</style>{}'
                .format(
                formatter.get_style_defs('.highlight'),
                highlight(sql, SqlLexer(), formatter)))
        display.display(
            display_obj,
            pd.read_sql(sql, connection)
        )  

In [16]:
# American Option Pricer with dividends
# Function arguments
# ABM(ft.partial(VP,K=Strike,CallPut='P'), StockPrice, TimeToMaturity(1/365), 
#      Interest Rate, Volatility, TreeSteps, dividends))
# Dividends are given in the format np.array([[time to ex dividend date, dividend],....,])

def BPTree(n, S, u, d, ex_div):
    r = [np.array([S])]
    for i in range(n):
        if (i in ex_div[:,0]):
            adj = ex_div[ex_div[:,0]==i][0,1]
            r.append(np.concatenate((r[-1][:1]*u-adj, r[-1]*d-adj)))
        else: r.append(np.concatenate((r[-1][:1]*u, r[-1]*d))) 
    return r

def GBM(R, P, S, T, r, v, n, ex_div):
    t = float(T)/n
    T_range = np.arange(0,T+t,t)
    ex_div = ex_div[ex_div[:,0]<=T]
    div_idx = []
    for i in range (len(ex_div[:,0])):
        T_diff = T_range[1:]-ex_div[i,0]
        T_diff[T_diff<0]=100000
        div_idx.append(np.argmin(T_diff))
    ex_div[:,0] = np.array(div_idx)
    u = np.exp(v * np.sqrt(t))
    d = 1./u
    p = (np.exp(r * t) - d)/(u - d)
    ptree = BPTree(n, S, u, d, ex_div)[::-1]
    # When calls the function R then uses the American function
    # And creates a new function R_ which has to be supplied two more arguments
    R_ = ft.partial(R, np.exp(-r*t), p)
    # P is the function that returns VP with a defined strike
    # map applies the function to each point of the tree
    return ft.reduce(R_, map(P, ptree))[0]

def American(D, p, a, b): return np.maximum(b, D*(a[:-1]*p + a[1:]*(1-p)))
# Call option S-K
def VP(S, K, CallPut): 
    if (CallPut=='C'): return np.maximum(S-K, 0)
    else: return np.maximum(K-S, 0)
ABM = ft.partial(GBM, American)

# Example of a call K,C/P,Stock,time to maturity, rate, vol, steps, dividends
# ABM(ft.partial(VP,K=23,CallPut='C'), 25,1, 0.05, 0.3, 5, np.array([[0,0]]))

# Exercise 1

For the 2nd series (2nd month) over the one year period of 2007, find all calls with more
than $0.50 Premium Over Parity (POP). Of these, rank the volatility of each strike versus
its moneyness (defined here as ratio of strike price to stock price).

For normally skewed options the volatility should be monotonically declining with
moneyness. Are there any notable exceptions?

Selected stock: CSCO

In [17]:
print_and_query('Q1.sql')

Unnamed: 0,Date,StockPrice,CallPut,Expiration,Strike,MBBO,ImpliedVolatility,OpenInterest,Volume,IntrinsicValue,POP,Moneyness
0,2007-01-03,27.730000,C,2007-02-17,27.5,1.30,0.284306,4366,2383,0.23,1.07,0.991706
1,2007-01-04,28.459999,C,2007-02-17,27.5,1.75,0.285575,5013,1338,0.96,0.79,0.966268
2,2007-01-04,28.459999,C,2007-02-17,30.0,0.58,0.281705,3241,5446,0.00,0.58,1.054111
3,2007-01-05,28.469999,C,2007-02-17,27.5,1.75,0.287733,5491,1542,0.97,0.78,0.965929
4,2007-01-05,28.469999,C,2007-02-17,30.0,0.58,0.284461,7640,2366,0.00,0.58,1.053741
5,2007-01-08,28.629999,C,2007-02-17,27.5,1.88,0.305506,6020,1064,1.13,0.75,0.960531
6,2007-01-08,28.629999,C,2007-02-17,30.0,0.63,0.294942,8685,3928,0.00,0.63,1.047852
7,2007-01-09,28.469999,C,2007-02-17,27.5,1.78,0.313203,6399,753,0.97,0.81,0.965929
8,2007-01-09,28.469999,C,2007-02-17,30.0,0.63,0.315533,10965,4905,0.00,0.63,1.053741
9,2007-01-10,28.680000,C,2007-02-17,27.5,1.90,0.312210,6297,755,1.18,0.72,0.958856


In [13]:
print(data1)

None


# Exercise 2 
Consider the day-to-day volatility of the 2nd month ATM option. Find the three biggest
overnight absolute changes in 2007. List the dates and attempt to determine the cause
online.
NOTE: make sure you define your interpretation of ATM.

In [18]:
print_and_query('Q2.sql')

Unnamed: 0,Date,ImpliedVolChange
0,2007-11-08,-0.089355
1,2007-05-09,-0.086544
2,2007-02-27,0.068508


# Exercise 3
Sometimes you may want to follow an option that does not actually trade. Synthetic
options are used as proxies, and derived by interpolating data from nearby options that
actually trade.

For each day in a week of your choosing, construct “synthetic ATM 45-day” put and call
options with the following properties:

a) the strike is constructed to be exactly the closing price

b) the expiration date is 45 days ahead

Find the implied volatilities by inverting Black-Scholes for the interpolated prices. Repeat
this for two additional series with strikes located +/- 10% of the synthetic ATM.

NOTE: To validate your synthetics, ensure that implied volatilities of the puts and calls are
approximately equal.

In [323]:
# Selecting the week between 2007-01-22 and 2007-01-26 and closest strike price ATM and +-10%
calls = query_dataframe_f('Q3Call.sql') 
puts = query_dataframe_f('Q3Put.sql')
# Selecting the starting date
date = str(puts.Date.loc[0])[:10]
# Selecting the futher expiration for the synthetic option
last_date = str(puts.Date.loc[0]+datetime.timedelta(days=50))[:10]
print 'First day of the considered week: %s' % date

First day of the considered week: 2007-01-22


In [459]:
# Selecting the week between 2007-01-22 and 2007-01-26 and closest strike price ATM and +-10%
sql_raw = open('Q3put.sql', 'r').read()
sql_format = sql_raw.format(
    opt_type='P'
)
opt = query_dataframe(sql_format) 
# Selecting the starting date
date = str(opt.Date.loc[0])[:10]
# Selecting the futher expiration for the synthetic option
last_date = str(opt.Date.loc[0]+datetime.timedelta(days=50))[:10]
print 'First day of the considered week: %s' % date

First day of the considered week: 2007-01-22


In [446]:
opt

Unnamed: 0,Date,StockPrice,CallPut,Expiration,DaysToMaturity,Strike,ImpliedVolatility,MBBO,StrikePriceTarget,TargetDistance,Code
0,2007-01-22,26.530001,P,2007-02-17,26,25.0,0.364191,0.38,26.53,-1.530001,LS-BM
1,2007-01-22,26.530001,P,2007-02-17,26,27.5,0.352929,1.5,26.53,0.969999,HS-BM
2,2007-01-22,26.530001,P,2007-03-17,54,25.0,0.304662,0.53,26.53,-1.530001,LS-AM
3,2007-01-22,26.530001,P,2007-03-17,54,27.5,0.295323,1.65,26.53,0.969999,HS-AM
4,2007-01-23,26.040001,P,2007-02-17,25,25.0,0.335144,0.43,26.04,-1.040001,LS-BM
5,2007-01-23,26.040001,P,2007-02-17,25,27.5,0.336939,1.78,26.04,1.459999,HS-BM
6,2007-01-23,26.040001,P,2007-03-17,53,25.0,0.287619,0.6,26.04,-1.040001,LS-AM
7,2007-01-23,26.040001,P,2007-03-17,53,27.5,0.281835,1.9,26.04,1.459999,HS-AM
8,2007-01-24,26.889999,P,2007-02-17,24,25.0,0.33971,0.23,26.89,-1.889999,LS-BM
9,2007-01-24,26.889999,P,2007-02-17,24,27.5,0.324824,1.18,26.89,0.610001,HS-BM


In [460]:
# Obtaining the 45 days interest rate for the days of the selected week
query = ''' Select Date, Days, Rate
            from XFDATA.dbo.ZERO_CURVE
            where Date='%s'
            and Days < 1000 ''' % (date)
zero_curve = query_dataframe(query,)
extra_days = pd.DataFrame(np.array([np.arange(45,50,1),np.empty(5)*np.nan]).T, columns=['Days','Rate'])
syntetic_rate = pd.DataFrame(extra_days, columns=['Days','Rate'])
zero_synt = zero_curve[['Days','Rate']].append(syntetic_rate).sort_values('Days').interpolate(method='linear')
# 45 days interest rate for each day of the week
zero_rate = zero_synt.Rate[zero_synt.Days>=45][0:5]
zero_rate

0    5.384937
1    5.388502
2    5.392066
3    5.395631
4    5.399196
Name: Rate, dtype: float64

In [461]:
# Checking for dividends
ticker = 'KO'
# Obtain the dividend amount and dates 
query = ''' SELECT CAST(datediff(dd,'%s',ExDate) AS FLOAT)/365 as TimeToExDate, Amount
            FROM XFDATA.dbo.DISTRIBUTION dist 
            INNER JOIN XFDATA.dbo.SECURITY sec ON dist.SecurityID=sec.SecurityID
            WHERE Ticker='%s' AND (DistributionType='%s' OR DistributionType='1')
            AND ExDate>='%s' AND ExDate<='%s' ''' % (date,ticker,'%',date,last_date)

dividends= query_dataframe(query,)
print('Dividends')
print(dividends)

Dividends
   TimeToExDate  Amount
0      0.136986    0.34


In [462]:
# Grouping by date and interpolating
from scipy.optimize import minimize

def synthetic(df_opt,dtm,div):
    implied_vol = []
    day = 0
    df_date = df_opt.groupby('Date')
    for group, df in df_date:
        # Adjusting the didivend ex date
        if (div.shape[0]!=0):
            div.values[:,0] = div.values[:,0] - 1./365
        # Linear Interpolation
        spline = sp.interpolate.interp2d(df.DaysToMaturity.values,df.Strike.values,
                                df.MBBO.values)
        S = df.StockPrice.values[0]
        T = dtm*1./365
        MBBO_synthetic = float(spline(dtm,S))
        # Defining the objective function
        def f(x):
            return (ABM(ft.partial(VP,K=S,CallPut=df.CallPut.values[0]),
                        S,T, zero_rate[day]*0.01, x, 1000, div.values)-MBBO_synthetic)**2
        # Defining mimization constraints
        cons = ({'type': 'ineq','fun' : lambda x: np.array(x),
                     'jac' : lambda x: np.array([1.0])})
        # Optimizing with a starting guess of the implied volatility being the mean of the IV
        res = minimize(f,0.3,constraints=cons)
        print('{} - {} - Syntetic MBBO: {:.3f}- Syntetic IV: {:.3f}'
              .format(df.CallPut.values[0],group,MBBO_synthetic,float(res.x)))
        implied_vol.append(res.x)
        day = day+1
    return np.array(implied_vol)

In [463]:
syntheticIV_puts = synthetic(opt.copy(),45,dividends.copy())

P - 2007-01-22 00:00:00 - Syntetic MBBO: 1.167- Syntetic IV: 0.335
P - 2007-01-23 00:00:00 - Syntetic MBBO: 1.098- Syntetic IV: 0.322
P - 2007-01-24 00:00:00 - Syntetic MBBO: 1.093- Syntetic IV: 0.311
P - 2007-01-25 00:00:00 - Syntetic MBBO: 1.086- Syntetic IV: 0.316
P - 2007-01-26 00:00:00 - Syntetic MBBO: 1.085- Syntetic IV: 0.315


# Exercise 4
Go to an online charting tool (e.g. Google finance) and look at the (current) one-year plot
of KO vs. PEP.

Suppose this motivates you to pairs trade. (One would still be curious whether the best
strategy is to trade stock price or volatility.)

Create a similar plot for the synthetic 45-day ATM option volatilities for both stocks for a
three-year period starting 2011.

At first glance, does the ratio seem to mean revert?

In [None]:
# In the period 