In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pandas_ta as ta
import yfinance as yf

from matplotlib.pyplot import figure
from pandarallel import pandarallel
from scipy.stats import norm

%matplotlib widget
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [None]:
dfs = []
directory_path = "data/spy-options-data-2020-2022"

for dirname, _, filenames in os.walk(directory_path):
    for filename in filenames:
        data_path = os.path.join(dirname, filename)
        print(data_path)
        dfs.append(pd.read_csv(data_path, low_memory=False))

df_raw = pd.concat(dfs)
columns = df_raw.columns
columns = [s.replace("[", "") for s in columns]
columns = [s.replace("]", "") for s in columns]
columns = [s.replace(" ", "") for s in columns]
df_raw.columns = columns

date_columns = ["QUOTE_READTIME", "QUOTE_DATE", "EXPIRE_DATE"]
numeric_cols = df_raw.columns.to_list()
numeric_cols.remove("QUOTE_READTIME")
numeric_cols.remove("QUOTE_DATE")
numeric_cols.remove("EXPIRE_DATE")

df_final = df_raw.drop(columns=date_columns)

for i in numeric_cols:
    df_final[i] = pd.to_numeric(df_final[i], errors="coerce")

df_final.drop(columns=["C_SIZE", "P_SIZE"], inplace=True)
df_final["EXPIRE_UNIX"] = pd.to_datetime(df_final.EXPIRE_UNIX, unit="s", utc=True)
df_final["QUOTE_UNIXTIME"] = pd.to_datetime(df_final.QUOTE_UNIXTIME, unit="s", utc=True).apply(
    lambda x: pd.Timestamp(x).round(freq="D")
)
df_final["OPTION_ID"] = (df_final["EXPIRE_UNIX"].astype(str) + df_final["STRIKE"].astype(str)).apply(lambda x: hash(x))
df_final["QUOTE_ID"] = (df_final["QUOTE_UNIXTIME"].astype(str) + df_final["OPTION_ID"].astype(str)).apply(
    lambda x: hash(x)
)
# df_final.set_index(pd.DatetimeIndex(df_final.INDEX), inplace=True)
# df_final.sort_index(inplace=True)

df_final.sort_values(by=["QUOTE_UNIXTIME", "EXPIRE_UNIX", "STRIKE"], inplace=True)
df_final.reset_index(inplace=True, drop=True)

df_final_total_options = df_final.groupby("OPTION_ID", as_index=False).size().rename(columns={"size": "total"})
df_final = df_final.loc[df_final.DTE < 60]
df_final = df_final.loc[
    df_final.OPTION_ID.isin(df_final_total_options.loc[df_final_total_options.total > 20].OPTION_ID)
]

df_final

In [None]:
periods = 10

for column in ["C_ASK", "P_ASK"]:
    max_ask_column = f"MAX_{column}_{periods}"
    column_rename = {}
    column_rename[column] = max_ask_column
    df_final.drop(columns=max_ask_column, errors="ignore", inplace=True)
    df_rolling_max_ask = (
        df_final.groupby("OPTION_ID", as_index=False)[column]
        .rolling(window=periods, min_periods=1)
        .max()
        .rename(columns=column_rename)
        .drop(columns=["OPTION_ID"])
        .join(df_final)[["OPTION_ID", max_ask_column]]
        .groupby("OPTION_ID", as_index=False)
        .shift(periods=-periods)
        .join(df_final)[["OPTION_ID", max_ask_column]]
        .drop(columns=["OPTION_ID"])
    )
    df_final = df_final.join(df_rolling_max_ask)

df_final

In [None]:
min_change = 0.1

max_c_ask_column = f"MAX_C_ASK_{periods}"
max_p_ask_column = f"MAX_P_ASK_{periods}"

df_final[f"{max_c_ask_column}_DIFF"] = df_final.C_ASK - df_final[max_c_ask_column]
df_final[f"{max_p_ask_column}_DIFF"] = df_final.P_ASK - df_final[max_p_ask_column]
df_final[f"ASK_DIFF_TOTAL"] = df_final[f"{max_c_ask_column}_DIFF"] + df_final[f"{max_p_ask_column}_DIFF"]


def is_good_buy(row):
    c_ask_change = 0 if row.C_ASK == 0 else (row[max_c_ask_column] - row.C_ASK) / row.C_ASK
    p_ask_change = 0 if row.P_ASK == 0 else (row[max_p_ask_column] - row.P_ASK) / row.P_ASK
    return c_ask_change > min_change or p_ask_change > min_change


# df_final["GOOD_BUY"] = df_final.apply(is_good_buy, axis=1)
df_final["ASK_TOTAL"] = df_final.C_ASK + df_final.P_ASK

df_final

In [None]:
# df_final.loc[(df_final.GOOD_BUY) & (df_final.ASK_TOTAL < 2)]
# df_final.groupby("GOOD_BUY", as_index=False).size()
# df_final.describe().astype(str)
# df_final.corr().astype(str)

In [None]:
df_final.loc[df_final.QUOTE_UNIXTIME == "2020-01-03 00:00:00+00:00"]