In [1]:
import utils
import models.categorical
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import ParameterGrid

import warnings
warnings.filterwarnings("ignore")

In [2]:
futuresList = utils.futuresList

logregParams = [
    {
        "C": [0.1, 0.5, 1.0, 1.5],
        "solver": ["liblinear"],
        "penalty": ["l1", "l2"],
        "class_weight": ["balanced", None] 
    },
    {
        "C": [0.1, 0.5, 1.0, 1.5],
        "solver": ["lbfgs", "newton-cg"],
        "penalty": ["l2", "none"],
        "class_weight": ["balanced", None] 
    }
]

In [3]:
future = futuresList[0]
df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
df.columns = ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OI', 'P', 'R', 'RINFO']
df = df.set_index("DATE")
df = df[(df.VOL != 0) & (df.CLOSE != 0)]
df = df.dropna(axis=0)
X_df = utils.generate_X_df([df.CLOSE, df.VOL], ["perc", "perc"])
y_df = utils.generate_y_cat(df.CLOSE)
cost_df = utils.perc_change(df.CLOSE, shift=0)

In [4]:
parameter_grid = list(ParameterGrid(logregParams))
# prepare collated results
agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                    columns=["accuracy_SMA", "opp_cost_SMA", "accuracy_EMA", "opp_cost_EMA"])
win_results_collated = []

# run walk forward validation 
for i in range(len(parameter_grid)):
    param_set = parameter_grid[i]
    model = LogisticRegression(**param_set)
    win_results, agg_results = models.categorical.walk_forward(
        model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, max_windows = 100
    )
    win_results_collated.append(win_results)
    agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
    agg_results_collated.loc[i, "accuracy_EMA"] = agg_results.loc["EMA", "accuracy"]
    agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]
    agg_results_collated.loc[i, "opp_cost_EMA"] = agg_results.loc["EMA", "opp_cost"]

# save parameters
parameter_df = pd.DataFrame.from_records(parameter_grid)
combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)

In [10]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(logregParams))

for future in futuresList:
    # load data
    df = pd.read_csv(f"tickerData/{future}.txt", parse_dates = ["DATE"])
    df.columns = ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OI', 'P', 'R', 'RINFO']
    df = df.set_index("DATE")
    df = df[(df.VOL != 0) & (df.CLOSE != 0)]
    df = df.dropna(axis=0)
    
    # load X and y
    X_df = utils.generate_X_df([df.CLOSE, df.VOL], ["perc", "perc"])
    y_df = utils.generate_y_cat(df.CLOSE)
    cost_df = utils.perc_change(df.CLOSE, shift=0)
    
    # prepare collated results
    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA", "accuracy_EMA", "opp_cost_EMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        model = LogisticRegression(**param_set)
        win_results, agg_results = models.categorical.walk_forward(
            model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, max_windows = 100
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "accuracy_EMA"] = agg_results.loc["EMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]
        agg_results_collated.loc[i, "opp_cost_EMA"] = agg_results.loc["EMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df.to_csv(f"model_metrics/logreg/perc/{future}.txt", index=False)

In [11]:
combined_df

Unnamed: 0,C,class_weight,penalty,solver,accuracy_SMA,opp_cost_SMA,accuracy_EMA,opp_cost_EMA
0,0.1,balanced,l1,liblinear,0.485697,9.51644,2.54136,81.338
1,0.1,balanced,l2,liblinear,0.492726,9.36104,2.55167,79.606
2,0.1,,l1,liblinear,0.517686,9.12362,2.69187,77.6465
3,0.1,,l2,liblinear,0.519047,9.12337,2.69323,77.6462
4,0.5,balanced,l1,liblinear,0.490795,9.48331,2.54671,80.9367
5,0.5,balanced,l2,liblinear,0.495651,9.22759,2.571,78.2269
6,0.5,,l1,liblinear,0.518085,9.12467,2.69227,77.6476
7,0.5,,l2,liblinear,0.519293,9.12351,2.69347,77.6464
8,1.0,balanced,l1,liblinear,0.490541,9.48032,2.53104,80.8265
9,1.0,balanced,l2,liblinear,0.498165,9.18755,2.60015,77.8073


In [9]:
agg_results_collated

Unnamed: 0,accuracy_SMA,opp_cost_SMA,accuracy_EMA,opp_cost_EMA
0,0.485697,9.51644,2.54136,81.338
1,0.492726,9.36104,2.55167,79.606
2,0.517686,9.12362,2.69187,77.6465
3,0.519047,9.12337,2.69323,77.6462
4,0.490798,9.48328,2.54672,80.9366
5,0.495651,9.22759,2.571,78.2269
6,0.518085,9.12467,2.69227,77.6476
7,0.519293,9.12351,2.69347,77.6464
8,0.490538,9.48035,2.53101,80.8267
9,0.498165,9.18755,2.60015,77.8073
