In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import timedelta
import datetime as dt
import heapq 

In [22]:
# loading data
path = r"Data\CVX_options.csv"
options_df = pd.read_csv(path, parse_dates=["date", "expiration date"])

In [23]:
# standardizing column naming 
options_df.columns = [x.lower().replace(" ", "_") for x in options_df.columns]

In [24]:
# concatenating call and put rows for the same price    
# call mid, put mid are columns 16 and 17 respectively
options_df["call_bid"] = 0
options_df["put_bid"] = 0
options_df["call_ask"] = 0 
options_df["put_ask"] = 0
options_df["call_open_interest"] = 0
options_df["put_open_interest"] = 0
options_df["call_volume"] = 0
options_df["put_volume"] = 0
options_df["call_size"] = False
options_df["put_size"] = False
    
# boolean masks for call and put options in every second row
call_mask = (options_df.iloc[::2, 2] == "call").values
put_mask = (options_df.iloc[::2, 2] == "put").values

# indices of every second row
indices = np.arange(0, options_df.shape[0], 2)

In [25]:
def line_break(index):
        if index % 6 == 0 and index != 0:
            return None
        else: 
            return " "

for idx, i in enumerate(options_df.columns):
    print(f"{idx}: {i},", end=line_break(idx))

0: date, 1: expiration_date, 2: type, 3: strike_price, 4: ask_price, 5: ask_size, 6: bid_price,
7: bid_size, 8: last_price, 9: volume, 10: open_interest, 11: closing_price, 12: exp_closing_price,
13: date_div, 14: exp_date_div, 15: call_bid, 16: put_bid, 17: call_ask, 18: put_ask,
19: call_open_interest, 20: put_open_interest, 21: call_volume, 22: put_volume, 23: call_size, 24: put_size,


In [26]:
assignment_list = ["bid_price", "bid_price", "ask_price", "ask_price",
                   "open_interest", "open_interest", "volume", "volume"]

for i in range(16, 21):
    # call value assignment based on call mask
    options_df.iloc[indices[call_mask], i] = options_df.iloc[indices[call_mask], list(options_df.columns).index(assignment_list[i-16])]
    # put value assignment based on call mask
    options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-16])]

    # put value assignment based on put mask
    options_df.iloc[indices[put_mask], i] = options_df.iloc[indices[put_mask], list(options_df.columns).index(assignment_list[i-16])]
    # call value assignment based on put mask
    options_df.iloc[indices[put_mask], i + 1] = options_df.iloc[indices[put_mask] + 1, list(options_df.columns).index(assignment_list[i-16])]

  options_df.iloc[indices[call_mask], i] = options_df.iloc[indices[call_mask], list(options_df.columns).index(assignment_list[i-16])]
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-16])]
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-16])]
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-16])]
  options_df.iloc[indices[call_mask], i + 1] = options_df.iloc[indices[call_mask] + 1, list(options_df.columns).index(assignment_list[i-16])]


In [27]:
# bool values if size exists 
# bid size and ask size col
as_col, bs_col = 6, 8

# call size assignment based on call mask
options_df.iloc[indices[call_mask], 23] = (options_df.iloc[indices[call_mask], as_col] > 0) & (options_df.iloc[indices[call_mask], bs_col] > 0)
# put size assignment based on call mask
options_df.iloc[indices[call_mask], 24] = (options_df.iloc[indices[call_mask], as_col] > 0) & (options_df.iloc[indices[call_mask], bs_col] > 0)                                                                                  

# call size assignment based on put mask
options_df.iloc[indices[put_mask], 24] = (options_df.iloc[indices[put_mask], as_col] > 0) & (options_df.iloc[indices[put_mask], bs_col] > 0)
# put size assignment based on put mask
options_df.iloc[indices[put_mask], 23] = (options_df.iloc[indices[put_mask], as_col] > 0) & (options_df.iloc[indices[put_mask], bs_col] > 0)     

In [28]:
# reducing rows to every other and fixing index
options_df = options_df[options_df.index % 2 == 0]
options_df.index = np.arange(0, len(options_df))

In [29]:
# combining open interest and volume
options_df["combined_oi"] = options_df["call_open_interest"] + options_df["put_open_interest"]
options_df["combined_volume"] = options_df["call_volume"] + options_df["put_volume"]

In [30]:
# creating DTE
T = (options_df["expiration_date"] - options_df["date"]).dt.days
options_df["DTE"] = T

In [31]:
options_df.iloc[:, 15:].head()

Unnamed: 0,call_bid,put_bid,call_ask,put_ask,call_open_interest,put_open_interest,call_volume,put_volume,call_size,put_size,combined_oi,combined_volume,DTE
0,0,41.65,41.65,45.5,45.5,0.0,0,0,False,False,45.5,0,4
1,0,0.0,0.0,0.25,0.25,0.0,0,0,False,False,0.25,0,4
2,0,0.0,0.0,0.25,0.25,0.0,0,0,False,False,0.25,0,4
3,0,0.0,0.0,0.25,0.25,0.0,0,0,False,False,0.25,0,4
4,0,0.0,0.0,0.25,0.25,13.0,0,0,False,False,13.25,0,4


In [33]:
bid_mask = (options_df["call_bid"] > 0) & (options_df["put_bid"] > 0)
ask_mask = (options_df["call_ask"] > 0) & (options_df["put_ask"] > 0)
size_mask = options_df["call_size"] & options_df["put_size"]

options_df = options_df[bid_mask & ask_mask & size_mask]