In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from clda import ClassificationModel

In [2]:
import sys 

sys.path.append('../../../')

from bin.main import get_path 
from main import Manager 
from bin.utils.tools import encode_orders
from stockData import StockData
from backtest.strategies import Policy 

m = Manager(get_path('../../../'))
p = Policy(get_path('../../../'), verbose = False)


2025-05-04 00:13:08,285 - bin.options.optgd.db_connect - INFO - Options DB Initialized
2025-05-04 00:13:08,287 - INFO - PriceDB Initialized successfully at 2025-05-04 00:13:08.287168
2025-05-04 00:13:08,287 - bin.price.db_connect - INFO - PriceDB Initialized successfully at 2025-05-04 00:13:08.287168
2025-05-04 00:13:08,287 - INFO - Established 3 database connections
2025-05-04 00:13:08,287 - bin.price.db_connect - INFO - Established 3 database connections
2025-05-04 00:13:08,333 - bin.alerts.options_alerts - INFO - Notifications instance initialized.
2025-05-04 00:13:08,339 - backtest.simulator - INFO - MarketSim initialized with verbose=False


In [None]:
def get_sd(stock, manager = m):
    sd = StockData(stock=stock, manager=manager, cache_dir="../../../data_cache")
    sd.clear_cache(disk=True, stock_specific=False)
    df = sd.get_features()
    return df 
def get_stock_data(stock, manager = m):
    df = get_sd(stock, manager)
    df['target'] = df['close'].pct_change().shift(-1)
    df = df.dropna()
    x = df.drop(columns=["close", "open", "high", "low","target"])
    y = df["target"]
    X_new = x.tail(1)
    X = x.dropna()
    y = pd.Series(np.where(y > 0, 1, 0), name='target', index = y.index)
    # Select 15 random features
    selected_features = np.random.choice(x.columns, size=15, replace=False)
    x = x[selected_features]
    print(f'df shape: {df.shape} X shape: {X.shape}, y shape: {y.shape}, X_new shape: {X_new.shape}')
    return x, y, X_new


def fit_models(stock, manager = m):
    # Get stock data
    X, y, X_new = get_stock_data(stock, manager)

    # Initialize and run the model
    model = ClassificationModel(
        X=X,
        y=y,
        numerical_cols=X.columns.tolist(),
        verbose = 0, 
        time_series=True
    )
    model.preprocess_data()
    model.train_models()

    # Display results
    results = model.get_results()
    print("\nModel Performance Results:")
    # print(results)

    next_prediction = model.predict_new_data(X_new)
    return model

In [11]:
def get_orders(models, stock = 'spy'):
    o = {}

    for x in models.keys():
        preds = models[x]
        orders = encode_orders(predictions = preds.values, test_index=preds.index, stock = stock, shares = 10, name = x)
        o[x] = orders

    orders = []; names = []
    for x in o.keys():
        orders.append(o[x])
        names.append(x)
    
    return orders, names 

In [12]:
def evaluate_orders(orders, names, policy = p):
    res = policy.eval_multiple_orders(
        orders = orders,
        names = names, 
        sv = 10000, 
        commission = 1.0, 
        impact = 0.0005
    )

    sim_results = p.list_eval.copy()
    more_stats = []
    for key in sim_results.keys():
        more_stats.append(p._qs(name = key, portvals=sim_results[key]['portfolio']).T )
    more_stats = pd.concat(more_stats, axis=1)
    return res, more_stats




In [13]:
from bin.utils.tools import pretty_print
def main(stock, manager = m, policy = p):
    # Fit models
    model = fit_models(stock, manager)
    models = model.model_predictions
    # Get orders
    orders, names = get_orders(models, stock)
    # Evaluate orders
    res, more_stats = evaluate_orders(orders, names, policy)
    # Print results
    pretty_print(res)
    # pretty_print(more_stats)
    return model

In [17]:
model = main("amc", m, p)


2025-05-04 00:15:24,788 - clda - INFO - ClassificationModel initialized with X shape: (779, 15)
2025-05-04 00:15:24,788 - clda - INFO - Starting data preprocessing


df shape: (779, 78) X shape: (779, 73), y shape: (779,), X_new shape: (1, 73)


2025-05-04 00:15:26,390 - backtest.simulator - INFO - Portfolio value computation completed, final value: $9,217.25



Model Performance Results:


2025-05-04 00:15:26,542 - backtest.simulator - INFO - Portfolio value computation completed, final value: $6,323.13
2025-05-04 00:15:26,680 - backtest.simulator - INFO - Portfolio value computation completed, final value: $6,393.81
2025-05-04 00:15:26,788 - backtest.simulator - INFO - Portfolio value computation completed, final value: $8,770.80
2025-05-04 00:15:26,890 - backtest.simulator - INFO - Portfolio value computation completed, final value: $9,114.76
2025-05-04 00:15:27,039 - backtest.simulator - INFO - Portfolio value computation completed, final value: $6,323.71
2025-05-04 00:15:27,171 - backtest.simulator - INFO - Portfolio value computation completed, final value: $7,045.04
2025-05-04 00:15:27,261 - backtest.simulator - INFO - Portfolio value computation completed, final value: $5,521.45
2025-05-04 00:15:27,537 - backtest.simulator - INFO - Portfolio value computation completed, final value: $10,209.30


Unnamed: 0,Stock,Days,StartDate,EndDate,StartBalance,EndBalance,Return,Commission,Impact
LDA,amc,234.0,2024-05-24,2025-05-01,10000.0,9217.25,-7.83,1.0,0.0
QDA,amc,234.0,2024-05-24,2025-05-01,10000.0,6323.13,-36.77,1.0,0.0
Naive Bayes,amc,234.0,2024-05-24,2025-05-01,10000.0,6393.81,-36.06,1.0,0.0
KNN,amc,234.0,2024-05-24,2025-05-01,10000.0,8770.8,-12.29,1.0,0.0
PCA KNN,amc,234.0,2024-05-24,2025-05-01,10000.0,9114.76,-8.85,1.0,0.0
Logistic Regression,amc,234.0,2024-05-24,2025-05-01,10000.0,6323.71,-36.76,1.0,0.0
Neural Network,amc,234.0,2024-05-24,2025-05-01,10000.0,7045.04,-29.55,1.0,0.0
Buy and Hold,amc,234.0,2024-05-24,2025-05-01,10000.0,5521.45,-44.79,0.0,0.0
Optimal Policy,amc,234.0,2024-05-24,2025-05-01,10000.0,10209.3,2.09,0.0,0.0


In [19]:
get_stock_data("amc", m)[0]

df shape: (779, 78) X shape: (779, 73), y shape: (779,), X_new shape: (1, 73)


Unnamed: 0_level_0,call_iv_chng,total_oi,total_oi_chng,call_iv,call_oi_pct_chng,call_vol,reversion_10D,put_vol_chng,SMA6D,BB_28D,total_vol_chng,SMA96D,call_vol_pct,lows_10D,ADX_28D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-03-24,0.000000,3291960,0.0,0.902535,0.000000,1008378,1.541234,0.0,176.800001,0.826938,0.0,242.260417,0.712445,129.000000,83.797119
2022-03-25,0.101813,1382278,-1909682.0,1.004348,-0.068725,88054,1.232094,-337020.0,185.216667,0.770873,-1257344.0,240.025000,0.557191,129.000000,83.886110
2022-03-28,0.134388,899634,-482644.0,1.138736,-0.018451,19588,2.410663,-28218.0,207.766665,1.946285,-96684.0,238.386458,0.319293,131.699997,83.971923
2022-03-29,-0.573065,816960,-82674.0,0.565671,0.071456,36314,1.748676,-4056.0,230.399996,1.537370,12670.0,237.293750,0.490610,142.300003,84.054671
2022-03-30,0.093045,933896,116936.0,0.658716,-0.067283,19582,0.859000,4546.0,242.766660,0.938831,-12186.0,235.980208,0.316697,148.600006,84.134464
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-25,0.000000,2154724,-67594.0,0.000000,0.032570,44553,-0.780669,-3485.0,2.720000,-0.503420,-6938.0,3.369792,1.765043,2.650000,90.504664
2025-04-28,0.000000,2093040,-61684.0,0.000000,-0.019875,79701,-0.539057,295.0,2.710000,-0.436147,22414.0,3.343958,1.809503,2.650000,90.772520
2025-04-29,0.000000,2151734,58694.0,0.000000,0.003949,55014,-0.815608,-778.0,2.708333,-0.540216,-57698.0,3.322604,1.745883,2.650000,91.030810
2025-04-30,0.000000,2185700,33966.0,0.833791,0.002142,61285,-1.188813,4851.0,2.700000,-0.536732,-10278.0,3.303438,1.560068,2.530000,91.279875


In [16]:
model.models

{'LDA': LinearDiscriminantAnalysis(),
 'QDA': QuadraticDiscriminantAnalysis(),
 'Naive Bayes': GaussianNB(),
 'KNN': KNeighborsClassifier(n_neighbors=6),
 'PCA KNN': KNeighborsClassifier(n_neighbors=4),
 'Logistic Regression': LogisticRegressionCV(class_weight='balanced', cv=5),
 'Neural Network': MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=5000)}