## Import features

In [1]:
import os
import time
import xgboost as xgb

# Ensure working directory path is correct
import pandas as pd

while os.path.split(os.getcwd())[-1] != "Options-Project":
    os.chdir(os.path.dirname(os.getcwd()))

### Variables

In [2]:
ticker = "AAPL"
model_params_path = "data/model_params/"

### Load

In [7]:
start_time = time.time()

model_params = dict()

for param_type in next(os.walk(model_params_path))[1]:

    param_path = os.path.join(model_params_path, param_type, ticker)
    param_list = []

    # Sanity check
    assert os.path.isdir(param_path), f"Can't find model parameters for {ticker}!"

    for file_year in os.listdir(param_path):
        if file_year.split("_")[-1] == "param.csv":
            df = pd.read_csv(os.path.join(param_path, file_year))

            # Convert columns to correct format
            df["date"] = pd.to_datetime(df["date"]).dt.date

            if "interval" in df.columns:
                df["interval"] = df["interval"].astype(str)

            param_list.append(df)

    # Concat & add to model params
    if param_list:
        df = pd.concat(param_list, ignore_index=True)
        df.sort_values(by=["date", "tag"], inplace=True)
        model_params[param_type] = df

print(f"Load model parameters - {round(time.time() - start_time, 2)} seconds")

Load model parameters - 0.22 seconds


### Pivot and combine model parameters to 1 row per day

In [8]:
param_df = pd.DataFrame(columns=["date"])

for param_type in model_params.keys():

    if param_type == "custom":
        pivot_cols = ["tag"]
    else:
        pivot_cols = ["tag", "interval"]

    df = model_params[param_type].pivot(index = "date", columns = pivot_cols)

    # Flatten column levels
    df.columns = ["-".join(col) for col in df.columns.values]

    # Update param dict
    model_params[param_type] = df.reset_index()

    # Add to all params
    param_df = param_df.merge(df, on="date", how="outer")

In [11]:
print(f"Total days: {param_df.shape[0]}")
print(f"Days without missing data: {param_df.dropna().shape[0]}")

Total days: 4273
Days without missing data: 3740


### XGBoost