In [None]:
import utils
import models.categorical
import math
import pickle
import numpy as np
import pandas as pd
from datetime import date

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

# Hyperparameter Tuning

In [None]:
logregParams = [
    {
        "C": [0.1, 0.5, 1.0, 1.5],
        "solver": ["liblinear"],
        "penalty": ["l1", "l2"],
        "class_weight": ["balanced", None] 
    },
    {
        "C": [0.1, 0.5, 1.0, 1.5],
        "solver": ["lbfgs", "newton-cg"],
        "penalty": ["l2", "none"],
        "class_weight": ["balanced", None] 
    }
]

start_date = date(2010, 1, 1)
end_date = date(2021, 1, 1)

In [None]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(logregParams))
y_var = "LONG_SHORT"
file_dir = "logreg/pct/"

for future in tqdm(utils.futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=False)
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]
    
    # prepare collated results
    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        model = LogisticRegression(**param_set)
        win_results, agg_results = models.categorical.walk_forward(
            model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)

In [None]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(logregParams))
y_var = "LONG_SHORT"
file_dir = "logreg/pct_tech_macro/"

for future in tqdm(utils.futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=True)
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]
    
    # prepare collated results
    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        model = LogisticRegression(**param_set)
        win_results, agg_results = models.categorical.walk_forward(
            model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)

In [None]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(logregParams))
y_var = "LONG_SHORT"
file_dir = "logreg/pct_macro/"

for future in tqdm(utils.futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=True)
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]
    
    # prepare collated results
    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        model = LogisticRegression(**param_set)
        win_results, agg_results = models.categorical.walk_forward(
            model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)

In [None]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(logregParams))
y_var = "LONG_SHORT"
file_dir = "logreg/pct_tech/"

for future in tqdm(utils.futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=False)
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]
    
    # prepare collated results
    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        model = LogisticRegression(**param_set)
        win_results, agg_results = models.categorical.walk_forward(
            model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)

In [None]:
# retrieve parameter grid
parameter_grid = list(ParameterGrid(logregParams))
y_var = "LONG_SHORT"
file_dir = "logreg/pct_linear_tech_macro/"

for future in tqdm(utils.futuresList):
    print(future)

    # load data
    df = utils.prepare_data(future)
    
    # generate X vars
    X_vars = utils.generate_X_vars(future, linearise=True, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=True)
    
    # load X and y
    X_df, y_df = utils.generate_X_y(df, X_vars=X_vars, y_var=y_var)
    cost_df = df["CLOSE_PCT"]
    
    # prepare collated results
    agg_results_collated = pd.DataFrame(index=list(range(len(parameter_grid))), 
                                        columns=["accuracy_SMA", "opp_cost_SMA"])
    win_results_collated = []

    # run walk forward validation 
    for i in range(len(parameter_grid)):
        param_set = parameter_grid[i]
        model = LogisticRegression(**param_set)
        win_results, agg_results = models.categorical.walk_forward(
            model = model, X = X_df, y = y_df, cost_weight = cost_df, rolling = True, 
            max_windows = 100, start_index = start_date
        )
        win_results_collated.append(win_results)
        agg_results_collated.loc[i, "accuracy_SMA"] = agg_results.loc["SMA", "accuracy"]
        agg_results_collated.loc[i, "opp_cost_SMA"] = agg_results.loc["SMA", "opp_cost"]

    # save parameters
    parameter_df = pd.DataFrame.from_records(parameter_grid)
    combined_df = pd.concat([parameter_df, agg_results_collated], axis=1)
    combined_df = combined_df.sort_values(by=["opp_cost_SMA"], ascending=False)
    combined_df.to_csv(f"model_metrics/categorical/{file_dir}{future}.csv", index=False)

# Save Meta Predictions

In [None]:
import utils
import models.categorical
import math
import pickle
import numpy as np
import pandas as pd
from datetime import date

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [None]:
file_dir = "logreg/pct_macro/"
y_var = "LONG_SHORT"

for future in tqdm(utils.futuresList):
    print(future)
    
    # generate X vars
    X_vars = utils.generate_X_vars(future, linearise=False, tech_indicators=[], macro_indicators=False)
    
    models.categorical.save_meta_predictions(
        path = file_dir, metric="opp_cost_SMA", model_fn = LogisticRegression, future = future,
        X_vars = X_vars, y_var = y_var, model_name = "logreg"
    )

# Save Models

In [None]:
import utils
import models.categorical
import pickle
import numpy as np
import pandas as pd
from datetime import date

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

## Train Models

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=True)
    
    # save model
    models.categorical.save_model(path='logreg/pct_tech_macro', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT",
                                  train_start=date(2018,9,1), train_end=date(2020, 9, 30))

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=False)
    
    # save model
    models.categorical.save_model(path='logreg/pct', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT",
                                  train_start=date(2018,9,1), train_end=date(2020, 9, 30))

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=True, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=True)
    
    # save model
    models.categorical.save_model(path='logreg/pct_linear_tech_macro', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT",
                                  train_start=date(2018,9,1), train_end=date(2020, 9, 30))

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=False)
    
    # save model
    models.categorical.save_model(path='logreg/pct_tech', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT",
                                  train_start=date(2018,9,1), train_end=date(2020, 9, 30))

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=True)
    
    # save model
    models.categorical.save_model(path='logreg/pct_macro', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT",
                                  train_start=date(2018,9,1), train_end=date(2020, 9, 30))

## Final Models

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=True)
    
    # save model
    models.categorical.save_model(path='logreg/pct_tech_macro', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT")

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=False)
    
    # save model
    models.categorical.save_model(path='logreg/pct', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT")

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=True, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=True)
    
    # save model
    models.categorical.save_model(path='logreg/pct_linear_tech_macro', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT")

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=["MACD", "RSI14", "VPT"], \
                                   macro_indicators=False)
    
    # save model
    models.categorical.save_model(path='logreg/pct_tech', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT")

In [None]:
for future in utils.futuresList:
    # prepare X variables
    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=True)
    
    # save model
    models.categorical.save_model(path='logreg/pct_macro', metric="opp_cost_SMA", 
                                  model_fn=LogisticRegression, model_wrapper=models.categorical.LogRegWrapper, 
                                  future=future, X_vars=X_vars, y_var="LONG_SHORT")

# Model Evaluation

In [None]:
metric = "opp_cost_SMA"
path = "logreg/pct_macro"

future_results = {}
for future in utils.futuresList:
    metrics_df = pd.read_csv(f'model_metrics/categorical/{path}/{future}.csv')
    best_metric = metrics_df.loc[metrics_df[metric] == min(metrics_df[metric])].reset_index(drop=True)
    
    # retrieve optimal parameters corresponding to best metric
    params_dict = {}
    for col in metrics_df.columns:
        if col[:8] != 'opp_cost' and col[:8] != 'accuracy' and \
            not (isinstance(best_metric[col][0], float) and math.isnan(best_metric[col][0])):
            params_dict[col] = best_metric[col][0]
    
    future_results[future] = params_dict

future_results_df = pd.DataFrame(future_results).transpose().reset_index()
future_results_df.columns = ["Ticker", "C", "class_weight", "penalty", "solver"]

future_industry_df = pd.read_csv("utils/future_industry_mapping.csv")

future_combined_df = pd.merge(future_industry_df, future_results_df, on="Ticker")
future_combined_df = future_combined_df.sort_values(["Type", "UnitedStates"])

In [None]:
pd.set_option('display.max_rows', 100)
future_combined_df

In [None]:
best_futures = ["F_LB", "F_LQ"]
for future in best_futures:
    print(future)
    
    with open(f'saved_models/categorical/logreg/final/pct_macro/{future}.p', 'rb') as f:
        lr_wrapper = pickle.load(f)

    X_vars = utils.generate_X_vars(future, linearise=False, \
                                   tech_indicators=[], \
                                   macro_indicators=True)

    for x in zip(X_vars, lr_wrapper.model.coef_[0]):
        print(x[0], round(x[1],4))
    
    print("--------------------------------")