In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from clda import ClassificationModel

In [2]:
import sys 

sys.path.append('../../../')

from bin.main import get_path 
from main import Manager 
from bin.utils.tools import encode_orders
from stockData import StockData
from backtest.strategies import Policy 

m = Manager(get_path('../../../'))
p = Policy(get_path('../../../'), verbose = False)


2025-05-05 23:02:47,010 - bin.options.optgd.db_connect - INFO - Options DB Initialized
2025-05-05 23:02:47,013 - INFO - PriceDB Initialized successfully at 2025-05-05 23:02:47.013147
2025-05-05 23:02:47,013 - bin.price.db_connect - INFO - PriceDB Initialized successfully at 2025-05-05 23:02:47.013147
2025-05-05 23:02:47,013 - INFO - Established 3 database connections
2025-05-05 23:02:47,013 - bin.price.db_connect - INFO - Established 3 database connections
2025-05-05 23:02:47,087 - bin.alerts.options_alerts - INFO - Notifications instance initialized.
2025-05-05 23:02:47,096 - backtest.simulator - INFO - MarketSim initialized with verbose=False


In [5]:
def get_sd(stock, manager = m):
    sd = StockData(stock=stock, manager=manager, cache_dir="../../../data_cache")
    sd.clear_cache(disk=True, stock_specific=False)
    df = sd.get_features().replace(0, np.nan).dropna()
    return df 


def get_stock_data(stock, manager = m):
    df = get_sd(stock, manager)
    df['target'] = df['close'].pct_change().shift(-1)
    df = df.dropna()
    x = df.drop(columns=["close", "open", "high", "low","target"])
    y = df["target"]
    X_new = x.tail(1)
    X = x.dropna()
    y = pd.Series(np.where(y > 0, 1, 0), name='target', index = y.index)
    # Select 15 random features
    selected_features = np.random.choice(x.columns, size=15, replace=False)
    x = x[selected_features]
    print(f'df shape: {df.shape} X shape: {X.shape}, y shape: {y.shape}, X_new shape: {X_new.shape}')
    return x, y, X_new


def fit_models(stock, manager = m):
    # Get stock data
    X, y, X_new = get_stock_data(stock, manager)

    # Initialize and run the model
    model = ClassificationModel(
        X=X,
        y=y,
        numerical_cols=X.columns.tolist(),
        verbose = 0, 
        time_series=True
    )
    model.preprocess_data()
    model.train_models()

    # Display results
    results = model.get_results()
    print("\nModel Performance Results:")
    # print(results)

    next_prediction = model.predict_new_data(X_new)
    return model

In [6]:
def get_orders(models, stock = 'spy'):
    o = {}

    for x in models.keys():
        preds = models[x]
        orders = encode_orders(predictions = preds.values, test_index=preds.index, stock = stock, shares = 10, name = x)
        o[x] = orders

    orders = []; names = []
    for x in o.keys():
        orders.append(o[x])
        names.append(x)
    
    return orders, names 

In [8]:
def evaluate_orders(orders, names, policy = p):
    res = policy.eval_multiple_orders(
        orders = orders,
        names = names, 
        sv = 10000, 
        commission = 1.0, 
        impact = 0.0005
    )

    sim_results = p.list_eval.copy()
    more_stats = []
    for key in sim_results.keys():
        more_stats.append(p._qs(name = key, portvals=sim_results[key]['portfolio']).T )
    more_stats = pd.concat(more_stats, axis=1)
    return res, more_stats




In [9]:
from bin.utils.tools import pretty_print
def main(stock, manager = m, policy = p):
    # Fit models
    model = fit_models(stock, manager)
    models = model.model_predictions
    # Get orders
    orders, names = get_orders(models, stock)
    # Evaluate orders
    res, more_stats = evaluate_orders(orders, names, policy)
    # Print results
    pretty_print(res)
    # pretty_print(more_stats)
    return model, res, more_stats

In [10]:
model, res ,more_stats = main("iwm", m, p)


2025-05-05 23:05:35,144 - clda - INFO - ClassificationModel initialized with X shape: (832, 15)
2025-05-05 23:05:35,144 - clda - INFO - Starting data preprocessing


df shape: (832, 78) X shape: (832, 73), y shape: (832,), X_new shape: (1, 73)


2025-05-05 23:05:36,509 - backtest.simulator - INFO - Final value: $10,058.62, Holdings: {'iwm': 50.0, 'Cash': 34.62175651550365}
2025-05-05 23:05:36,543 - backtest.simulator - INFO - Final value: $10,186.27, Holdings: {'iwm': 50.0, 'Cash': 162.26702919006289}
2025-05-05 23:05:36,574 - backtest.simulator - INFO - Final value: $10,138.29, Holdings: {'iwm': 50.0, 'Cash': 114.29322517395008}
2025-05-05 23:05:36,608 - backtest.simulator - INFO - Final value: $10,180.93, Holdings: {'iwm': 50.0, 'Cash': 156.92576736450224}
2025-05-05 23:05:36,643 - backtest.simulator - INFO - Final value: $10,184.49, Holdings: {'iwm': 50.0, 'Cash': 160.4869647979731}
2025-05-05 23:05:36,674 - backtest.simulator - INFO - Final value: $10,043.53, Holdings: {'iwm': 50.0, 'Cash': 19.530749206543078}



Model Performance Results:


2025-05-05 23:05:36,708 - backtest.simulator - INFO - Final value: $10,259.76, Holdings: {'iwm': 50.0, 'Cash': 235.76170494079565}
2025-05-05 23:05:36,732 - backtest.simulator - INFO - Final value: $10,377.09, Holdings: {'iwm': 0.0, 'Cash': 10377.08529663086}
2025-05-05 23:05:36,798 - backtest.simulator - INFO - Final value: $13,078.15, Holdings: {'iwm': 50.0, 'Cash': 3054.1488647460938}


Unnamed: 0,Stock,Days,StartDate,EndDate,StartBalance,EndBalance,Return,Commission,Impact
LDA,iwm,259.0,2024-04-22,2025-05-02,10000.0,10058.62,0.59,1.0,0.0
QDA,iwm,259.0,2024-04-22,2025-05-02,10000.0,10186.27,1.86,1.0,0.0
Naive Bayes,iwm,259.0,2024-04-22,2025-05-02,10000.0,10138.29,1.38,1.0,0.0
KNN,iwm,259.0,2024-04-22,2025-05-02,10000.0,10180.93,1.81,1.0,0.0
PCA KNN,iwm,259.0,2024-04-22,2025-05-02,10000.0,10184.49,1.84,1.0,0.0
Logistic Regression,iwm,259.0,2024-04-22,2025-05-02,10000.0,10043.53,0.44,1.0,0.0
Neural Network,iwm,259.0,2024-04-22,2025-05-02,10000.0,10259.76,2.6,1.0,0.0
Buy and Hold,iwm,259.0,2024-04-22,2025-05-02,10000.0,10377.09,3.77,0.0,0.0
Optimal Policy,iwm,259.0,2024-04-22,2025-05-02,10000.0,13078.15,30.78,0.0,0.0


In [11]:
orders, names = get_orders(model.model_predictions, stock = "iwm")

In [14]:
import pickle
d = {x:y for x, y in zip(names, orders)}
with open("orders.pkl", "wb") as f:
    pickle.dump(d, f)