In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from clda import ClassificationModel

In [2]:
import sys 

sys.path.append('../../../')

from bin.main import get_path 
from main import Manager 
from bin.utils.tools import encode_orders
from stockData import StockData
from backtest.strategies import Policy 

m = Manager(get_path('../../../'))
p = Policy(get_path('../../../'), verbose = False)


2025-05-05 23:25:27,281 - bin.options.optgd.db_connect - INFO - Options DB Initialized
2025-05-05 23:25:27,291 - INFO - PriceDB Initialized successfully at 2025-05-05 23:25:27.291039
2025-05-05 23:25:27,291 - bin.price.db_connect - INFO - PriceDB Initialized successfully at 2025-05-05 23:25:27.291039
2025-05-05 23:25:27,291 - INFO - Established 3 database connections
2025-05-05 23:25:27,291 - bin.price.db_connect - INFO - Established 3 database connections
2025-05-05 23:25:27,357 - bin.alerts.options_alerts - INFO - Notifications instance initialized.
2025-05-05 23:25:27,370 - backtest.simulator - INFO - MarketSim initialized with verbose=False


In [3]:
def get_sd(stock, manager = m):
    sd = StockData(stock=stock, manager=manager, cache_dir="../../../data_cache")
    sd.clear_cache(disk=True, stock_specific=False)
    df = sd.get_features().replace(0, np.nan).dropna()
    return df 


def get_stock_data(stock, manager = m):
    df = get_sd(stock, manager)
    df['target'] = df['close'].pct_change().shift(-1)
    df = df.dropna()
    x = df.drop(columns=["close", "open", "high", "low","target"])
    y = df["target"]
    X_new = x.tail(1)
    X = x.dropna()
    y = pd.Series(np.where(y > 0, 1, 0), name='target', index = y.index)
    # Select 15 random features
    selected_features = np.random.choice(x.columns, size=15, replace=False)
    x = x[selected_features]
    print(f'df shape: {df.shape} X shape: {X.shape}, y shape: {y.shape}, X_new shape: {X_new.shape}')
    return x, y, X_new


def fit_models(stock, manager = m):
    # Get stock data
    X, y, X_new = get_stock_data(stock, manager)

    # Initialize and run the model
    model = ClassificationModel(
        X=X,
        y=y,
        numerical_cols=X.columns.tolist(),
        verbose = 0, 
        time_series=True
    )
    model.preprocess_data()
    model.train_models()

    # Display results
    results = model.get_results()
    print("\nModel Performance Results:")
    # print(results)

    next_prediction = model.predict_new_data(X_new)
    return model

In [10]:
def get_orders(models, stock = 'spy'):
    o = {}

    for x in models.keys():
        preds = models[x]
        orders = encode_orders(predictions = preds.values, test_index=preds.index, stock = stock, shares = 100, name = x)
        o[x] = orders

    orders = []; names = []
    for x in o.keys():
        orders.append(o[x])
        names.append(x)
    
    return orders, names 

In [11]:
def evaluate_orders(orders, names, policy = p):
    res = policy.eval_multiple_orders(
        orders = orders,
        names = names, 
        sv = 100_000, 
        commission = 1.0, 
        impact = 0.0005
    )

    sim_results = p.list_eval.copy()
    more_stats = []
    for key in sim_results.keys():
        more_stats.append(p._qs(name = key, portvals=sim_results[key]['portfolio']).T )
    more_stats = pd.concat(more_stats, axis=1)
    return res, more_stats




In [12]:
from bin.utils.tools import pretty_print
def main(stock, manager = m, policy = p):
    # Fit models
    model = fit_models(stock, manager)
    models = model.model_predictions
    # Get orders
    orders, names = get_orders(models, stock)
    # Evaluate orders
    res, more_stats = evaluate_orders(orders, names, policy)
    # Print results
    pretty_print(res)
    # pretty_print(more_stats)
    return model, res, more_stats

In [13]:
model, res ,more_stats = main("iwm", m, p)


2025-05-05 23:36:32,845 - clda - INFO - ClassificationModel initialized with X shape: (832, 15)
2025-05-05 23:36:32,846 - clda - INFO - Starting data preprocessing


df shape: (832, 78) X shape: (832, 73), y shape: (832,), X_new shape: (1, 73)


2025-05-05 23:36:34,232 - backtest.simulator - INFO - Final value: $100,944.78, Holdings: {'iwm': 500.0, 'Cash': 704.7821670532394}
2025-05-05 23:36:34,266 - backtest.simulator - INFO - Final value: $100,584.65, Holdings: {'iwm': 500.0, 'Cash': 344.64888229369535}
2025-05-05 23:36:34,298 - backtest.simulator - INFO - Final value: $102,642.80, Holdings: {'iwm': 500.0, 'Cash': 2402.800246429455}
2025-05-05 23:36:34,331 - backtest.simulator - INFO - Final value: $102,204.42, Holdings: {'iwm': 500.0, 'Cash': 1964.4219871521054}
2025-05-05 23:36:34,362 - backtest.simulator - INFO - Final value: $101,982.85, Holdings: {'iwm': 500.0, 'Cash': 1742.8482429504402}
2025-05-05 23:36:34,391 - backtest.simulator - INFO - Final value: $102,776.35, Holdings: {'iwm': 200.0, 'Cash': 62680.34995040894}



Model Performance Results:


2025-05-05 23:36:34,423 - backtest.simulator - INFO - Final value: $101,397.26, Holdings: {'iwm': 500.0, 'Cash': 1157.2665969848567}
2025-05-05 23:36:34,447 - backtest.simulator - INFO - Final value: $103,785.64, Holdings: {'iwm': 0.0, 'Cash': 103785.640625}
2025-05-05 23:36:34,513 - backtest.simulator - INFO - Final value: $130,781.49, Holdings: {'iwm': 500.0, 'Cash': 30541.488647460938}


Unnamed: 0,Stock,Days,StartDate,EndDate,StartBalance,EndBalance,Return,Commission,Impact
LDA,iwm,259.0,2024-04-22,2025-05-02,100000.0,100944.78,0.94,1.0,0.0
QDA,iwm,259.0,2024-04-22,2025-05-02,100000.0,100584.65,0.58,1.0,0.0
Naive Bayes,iwm,259.0,2024-04-22,2025-05-02,100000.0,102642.8,2.64,1.0,0.0
KNN,iwm,259.0,2024-04-22,2025-05-02,100000.0,102204.42,2.2,1.0,0.0
PCA KNN,iwm,259.0,2024-04-22,2025-05-02,100000.0,101982.85,1.98,1.0,0.0
Logistic Regression,iwm,259.0,2024-04-22,2025-05-02,100000.0,102776.35,2.78,1.0,0.0
Neural Network,iwm,259.0,2024-04-22,2025-05-02,100000.0,101397.26,1.4,1.0,0.0
Buy and Hold,iwm,259.0,2024-04-22,2025-05-02,100000.0,103785.64,3.79,0.0,0.0
Optimal Policy,iwm,259.0,2024-04-22,2025-05-02,100000.0,130781.49,30.78,0.0,0.0


In [14]:
orders, names = get_orders(model.model_predictions, stock = "iwm")

In [15]:
import pickle
d = {x:y for x, y in zip(names, orders)}
with open("orders.pkl", "wb") as f:
    pickle.dump(d, f)