In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import timedelta
import datetime as dt
import heapq 

In [2]:
# loading data
path = r"Data\AAPL_options.csv"
options_df = pd.read_csv(path, parse_dates=["date", "expiration date"])

In [3]:
# standardizing column naming 
options_df.columns = [x.lower().replace(" ", "_") for x in options_df.columns]
original_options_df = options_df.copy()

In [4]:
# concatenating call and put rows for the same price    
# call mid, put mid are columns 16 and 17 respectively
options_df["call_bid"] = 0
options_df["put_bid"] = 0
options_df["call_ask"] = 0 
options_df["put_ask"] = 0
options_df["call_open_interest"] = 0
options_df["put_open_interest"] = 0
options_df["call_volume"] = 0
options_df["put_volume"] = 0
options_df["call_bid_size"] = 0
options_df["put_bid_size"] = 0
options_df["call_ask_size"] = 0
options_df["put_ask_size"] = 0
    
# boolean masks for call and put options in every second row
call_mask = (options_df.iloc[::2, 2] == "call").values
put_mask = (options_df.iloc[::2, 2] == "put").values

# indices of every second row
indices = np.arange(0, options_df.shape[0], 2)

In [5]:
def line_break(index):
        if index % 6 == 0 and index != 0:
            return None
        else: 
            return " "

for idx, i in enumerate(options_df.columns):
    print(f"{idx}: {i},", end=line_break(idx))

0: date, 1: expiration_date, 2: type, 3: strike_price, 4: ask_price, 5: ask_size, 6: bid_price,
7: bid_size, 8: last_price, 9: volume, 10: open_interest, 11: closing_price, 12: exp_closing_price,
13: date_div, 14: exp_date_div, 15: call_bid, 16: put_bid, 17: call_ask, 18: put_ask,
19: call_open_interest, 20: put_open_interest, 21: call_volume, 22: put_volume, 23: call_bid_size, 24: put_bid_size,
25: call_ask_size, 26: put_ask_size, 

In [6]:
len(options_df.columns)

27

In [7]:
assignment_list = ["bid_price", "bid_price", "ask_price", "ask_price",
                   "open_interest", "open_interest", "volume", "volume", 
                   "bid_size", "bid_size", "ask_size", "ask_size"]

for i in range(15, len(options_df.columns)-1):
    # call value assignment based on call mask
    options_df.iloc[indices[call_mask], i] = options_df.iloc[indices[call_mask], list(options_df.columns).index(assignment_list[i-15])]
    # put value assignment based on call mask
    options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-15])]

    # put value assignment based on put mask
    options_df.iloc[indices[put_mask], i + 1] = options_df.iloc[indices[put_mask], list(options_df.columns).index(assignment_list[i-15])]
    # call value assignment based on put mask
    options_df.iloc[indices[put_mask], i] = options_df.iloc[indices[put_mask] + 1, list(options_df.columns).index(assignment_list[i-15])]

  options_df.iloc[indices[call_mask], i] = options_df.iloc[indices[call_mask], list(options_df.columns).index(assignment_list[i-15])]
 1.46625e+01]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-15])]
 1.46625e+01]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-15])]
 1.56875e+01]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-15])]
 1.56875e+01]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  options_df.iloc[indices[call_m

In [8]:
# reducing rows to every other and fixing index
options_df = options_df[options_df.index % 2 == 0]
options_df.index = np.arange(0, len(options_df))

In [9]:
# combining open interest and volume
options_df["combined_oi"] = options_df["call_open_interest"] + options_df["put_open_interest"]
options_df["combined_volume"] = options_df["call_volume"] + options_df["put_volume"]

In [10]:
# creating DTE
T = (options_df["expiration_date"] - options_df["date"]).dt.days
options_df["DTE"] = T

In [11]:
options_df.iloc[0, :-3]

date                  2016-01-04 00:00:00
expiration_date       2016-01-08 00:00:00
type                                 call
strike_price                         15.0
ask_price                          11.375
ask_size                             20.0
bid_price                            11.3
bid_size                             12.0
last_price                            0.0
volume                                0.0
open_interest                         0.0
closing_price                      26.337
exp_closing_price                   24.24
date_div                         0.085246
exp_date_div                      0.09377
call_bid                             11.3
put_bid                              11.3
call_ask                           11.375
put_ask                            11.375
call_open_interest                    0.0
put_open_interest                       0
call_volume                             0
put_volume                              0
call_bid_size                     

In [12]:
original_options_df.head()

Unnamed: 0,date,expiration_date,type,strike_price,ask_price,ask_size,bid_price,bid_size,last_price,volume,open_interest,closing_price,exp_closing_price,date_div,exp_date_div
0,2016-01-04,2016-01-08,call,15.0,11.375,20.0,11.3,12.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377
1,2016-01-04,2016-01-08,put,15.0,0.005,1400.0,0.0,0.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377
2,2016-01-04,2016-01-08,put,16.25,0.005,1412.0,0.0,0.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377
3,2016-01-04,2016-01-08,call,16.25,10.125,20.0,10.05,12.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377
4,2016-01-04,2016-01-08,put,17.5,0.005,1404.0,0.0,0.0,0.0,0.0,0.0,26.337,24.24,0.085246,0.09377


In [13]:
options_df.columns

Index(['date', 'expiration_date', 'type', 'strike_price', 'ask_price',
       'ask_size', 'bid_price', 'bid_size', 'last_price', 'volume',
       'open_interest', 'closing_price', 'exp_closing_price', 'date_div',
       'exp_date_div', 'call_bid', 'put_bid', 'call_ask', 'put_ask',
       'call_open_interest', 'put_open_interest', 'call_volume', 'put_volume',
       'call_bid_size', 'put_bid_size', 'call_ask_size', 'put_ask_size',
       'combined_oi', 'combined_volume', 'DTE'],
      dtype='object')

In [16]:
call_columns_to_keep = ["date", "expiration_date", "strike_price", 
                        "closing_price", "call_bid", "call_ask", "call_bid_size",
                        "call_ask_size", "call_open_interest", "call_volume", "DTE"]
call_df = options_df[call_columns_to_keep]
call_df.head(1)

Unnamed: 0,date,expiration_date,strike_price,closing_price,call_bid,call_ask,call_bid_size,call_ask_size,call_open_interest,call_volume,DTE
0,2016-01-04,2016-01-08,15.0,26.337,11.3,11.375,12,20,0.0,0,4


In [17]:
put_columns_to_keep = ["date", "expiration_date", "strike_price", 
                        "closing_price", "put_bid", "put_ask", "put_bid_size",
                        "put_ask_size", "put_open_interest", "put_volume", "DTE"]
put_df = options_df[put_columns_to_keep]
put_df.head(1)

Unnamed: 0,date,expiration_date,strike_price,closing_price,put_bid,put_ask,put_bid_size,put_ask_size,put_open_interest,put_volume,DTE
0,2016-01-04,2016-01-08,15.0,26.337,11.3,11.375,12,1400,0,0,4


In [20]:
len(call_df), len(put_df)

(156830, 156830)

In [33]:
bid_mask = (options_df["call_bid"] > 0) & (options_df["put_bid"] > 0)
ask_mask = (options_df["call_ask"] > 0) & (options_df["put_ask"] > 0)
size_mask = options_df["call_size"] & options_df["put_size"]

options_df = options_df[bid_mask & ask_mask & size_mask]

In [15]:
len(options_df)

116936