In [34]:
# Display greeks, term structure, skew, distribution of returns, statistical moments, vol smile/smirk, vol surface, IV vs RV

In [35]:
import numpy as  np
import matplotlib.pyplot as plt
import pandas as pd

In [36]:
# loading data
path = r"C:\Users\semic\OneDrive\Desktop\Quant\Quant_Finance\Options\Data\AAPL_options.csv"
options_df = pd.read_csv(path)
options_df.head(2)

Unnamed: 0,date,expiration date,type,strike price,ask price,ask size,bid price,bid size,last price,volume,open interest,closing price,exp closing price,date div,exp date div
0,2016-01-04,2016-01-08,call,15.0,11.375,20.0,11.3,12.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377
1,2016-01-04,2016-01-08,put,15.0,0.005,1400.0,0.0,0.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377


In [37]:
# creating mid price
options_df["mid price"] = round((options_df["ask price"] + options_df["bid price"])/2, 5)
options_df.head(2)

Unnamed: 0,date,expiration date,type,strike price,ask price,ask size,bid price,bid size,last price,volume,open interest,closing price,exp closing price,date div,exp date div,mid price
0,2016-01-04,2016-01-08,call,15.0,11.375,20.0,11.3,12.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377,11.3375
1,2016-01-04,2016-01-08,put,15.0,0.005,1400.0,0.0,0.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377,0.0025


In [38]:
# concatenating call and put rows for the same price    
# call mid, put mid are columns 16 and 17 respectively
options_df["call mid"] = 0
options_df["put mid"] = 0
    
# boolean masks for call and put options in every second row
call_mask = (options_df.iloc[::2, 2] == "call").values
put_mask = (options_df.iloc[::2, 2] == "put").values

# indices of every second row
indices = np.arange(0, options_df.shape[0], 2)

# assign values based on call options
# if call, call mid will be assigned from mid price and put mid will be assigned from next mid price
options_df.iloc[indices[call_mask], 16] = options_df.iloc[indices[call_mask], 15].values
options_df.iloc[indices[call_mask], 17] = options_df.iloc[indices[call_mask] + 1, 15].values
# assign values based on put options
options_df.iloc[indices[put_mask], 17] = options_df.iloc[indices[put_mask], 15].values
options_df.iloc[indices[put_mask], 16] = options_df.iloc[indices[put_mask] + 1, 15].values

In [39]:
# reducing rows to every other and fixing index
options_df = options_df[options_df.index % 2 == 0]
options_df.index = np.arange(0, len(options_df))

# dropping unnecessary columns
options_df = options_df.drop(["bid price", "ask price", "last price",
"volume", "open interest", "exp closing price", "date div", "exp date div","type"], axis=1)

# creating DTE
T = (pd.to_datetime(options_df["expiration date"]) - pd.to_datetime(options_df["date"])).dt.days
options_df["DTE"] = T

options_df.head(2)

Unnamed: 0,date,expiration date,strike price,ask size,bid size,closing price,mid price,call mid,put mid,DTE
0,2016-01-04,2016-01-08,15.0,20.0,12.0,26.337,11.3375,11.3375,0.0025,4
1,2016-01-04,2016-01-08,16.25,1412.0,0.0,26.337,0.0025,10.0875,0.0025,4


In [42]:
# creating new dataframe for date organization
df = pd.DataFrame()

# organizing based on dates
for header in ["DTE", "strike price", "closing price", "call mid", "put mid"]:
    df[header] = options_df.groupby("date")[header].apply(list)

# converting index to date column
if "date" not in df.columns:
    df = df.reset_index()

df["date"] = pd.to_datetime(df["date"])

df.head(2)

Unnamed: 0,date,DTE,strike price,closing price,call mid,put mid
0,2016-01-04,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...","[15.0, 16.25, 17.5, 18.75, 20.0, 21.25, 22.5, ...","[26.337, 26.337, 26.337, 26.337, 26.337, 26.33...","[11.3375, 10.0875, 8.8375, 7.5875, 6.3375, 5.0...","[0.0025, 0.0025, 0.0025, 0.0025, 0.00125, 0.00..."
1,2016-01-05,"[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...","[15.0, 16.25, 17.5, 18.75, 20.0, 20.125, 20.25...","[25.677, 25.677, 25.677, 25.677, 25.677, 25.67...","[10.67875, 9.42875, 8.17875, 6.92875, 5.67875,...","[0.0025, 0.0025, 0.0025, 0.0025, 0.00125, 0.00..."
