In [None]:
import utils
import models.categorical
import pandas as pd
from datetime import date

from sklearn.dummy import DummyClassifier
from sklearn.model_selection import ParameterGrid
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from progressbar import ProgressBar

import warnings
warnings.filterwarnings("ignore")

In [None]:
futuresList = utils.futuresList

start_date = date(2000, 1, 1)

In [None]:
# retrieve parameter grid
pbar = ProgressBar()

# prepare collated results
agg_results_collated = pd.DataFrame(index=list(range(len(futuresList))), 
                                    columns=["accuracy_SMA", "opp_cost_SMA", "accuracy_EMA", "opp_cost_EMA"])

for i in pbar(range(len(futuresList))):
    future = futuresList[i]
    print(future)
    # load data
    df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
    df.columns = ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OI', 'P', 'R', 'RINFO']
    df = df.set_index("DATE")
    df = df[(df.VOL != 0) & (df.CLOSE != 0)]
    df = df.dropna(axis=0)
    
    # load X and y
    X_df = utils.generate_X_df([df.CLOSE, df.VOL], ["perc", "perc"])
    y_df = utils.generate_y_cat(df.CLOSE)
    cost_df = utils.perc_change(df.CLOSE, shift=0)
    
    # run walk forward validation
    model = DummyClassifier(strategy="prior")
    win_results, agg_results = models.categorical.walk_forward(model = model, 
                                                               X = X_df, y = y_df, cost_weight = cost_df, 
                                                               rolling = True, max_windows = 100, 
                                                               start_index = start_date)
    agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
    agg_results_collated.loc[i, "accuracy_EMA"] = agg_results.loc["EMA", "accuracy"]
    agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]
    agg_results_collated.loc[i, "opp_cost_EMA"] = agg_results.loc["EMA", "opp_cost"]

    # save parameters
    agg_results_collated["future"] = futuresList
    agg_results_collated["strategy"] = "prior"
    agg_results_collated["random_state"] = 123
    
    combined_df = agg_results_collated[["future", "strategy", "random_state", "accuracy_SMA",
                                       "opp_cost_SMA", "accuracy_EMA", "opp_cost_EMA"]]

    combined_df.to_csv(f"model_metrics/dummy.csv", index=False)

In [None]:
# retrieve parameter grid
pbar = ProgressBar()

# prepare collated results
agg_results_collated = pd.DataFrame(index=list(range(len(futuresList))), 
                                    columns=["accuracy_SMA", "opp_cost_SMA", "accuracy_EMA", "opp_cost_EMA"])

for i in pbar(range(len(futuresList))):
    future = futuresList[i]
    print(future)
    # load data
    df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
    df.columns = ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OI', 'P', 'R', 'RINFO']
    df = df.set_index("DATE")
    df = df[(df.VOL != 0) & (df.CLOSE != 0)]
    df = df.dropna(axis=0)
    
    # load X and y
    X_df = utils.generate_X_df([df.CLOSE, df.VOL], ["perc", "perc"])
    y_df = utils.generate_y_cat(df.CLOSE)
    cost_df = utils.perc_change(df.CLOSE, shift=0)
    
    # run walk forward validation
    model = DummyClassifier(strategy="stratified")
    win_results, agg_results = models.categorical.walk_forward(model = model, 
                                                               X = X_df, y = y_df, cost_weight = cost_df, 
                                                               rolling = True, max_windows = 100, 
                                                               start_index = start_date)
    agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
    agg_results_collated.loc[i, "accuracy_EMA"] = agg_results.loc["EMA", "accuracy"]
    agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]
    agg_results_collated.loc[i, "opp_cost_EMA"] = agg_results.loc["EMA", "opp_cost"]

    # save parameters
    agg_results_collated["future"] = futuresList
    agg_results_collated["strategy"] = "stratified"
    agg_results_collated["random_state"] = 123
    
    combined_df = agg_results_collated[["future", "strategy", "random_state", "accuracy_SMA",
                                       "opp_cost_SMA", "accuracy_EMA", "opp_cost_EMA"]]

    combined_df.to_csv(f"model_metrics/dummy_stratified.csv", index=False)