In [23]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import pandas as pd
from datetime import datetime, timezone
import logging

logging.basicConfig(
    level=logging.INFO,  # Set the logging level
    format='%(asctime)s - %(levelname)s - %(message)s',  # Format for the log messages
    handlers=[
        logging.StreamHandler()  # Log to the console
    ]
)

%reload_ext autoreload
%autoreload 2
from data.raw.retrievers.alpaca_markets_retriever import AlpacaMarketsRetriever
from config.constants import *
from data.processed.dataset_creation import DatasetCreator
from data.processed.indicators import *
from data.processed.targets import Balanced3ClassClassification
from data.processed.normalization import ZScoreOverWindowNormalizer, ZScoreNormalizer, MinMaxNormalizer
from data.processed.missing_values_handling import DummyMissingValuesHandler
from data.processed.dataset_pytorch import DatasetPytorch
from modeling.trainer import Trainer
from observability.mlflow_integration import log_experiment

from config.experiments.cur_experiment import config

In [None]:
retriever = AlpacaMarketsRetriever()

retrieval_result = retriever.bars(
    symbol_or_symbols=config.data_config.symbol_or_symbols, 
    start=config.data_config.start, 
    end=config.data_config.end)

In [None]:
dataset_creator = DatasetCreator(
    features=config.data_config.features,
    target=config.data_config.target,
    normalizer=config.data_config.normalizer,
    missing_values_handler=config.data_config.missing_values_handler,
    train_set_last_date=config.data_config.train_set_last_date, 
    in_seq_len=config.data_config.in_seq_len,
    flatten_sequence=config.data_config.flatten_sequence
)

X_train, y_train, X_test, y_test = dataset_creator.create_dataset_numpy(retrieval_result)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
train_loader = DatasetPytorch(X_train, y_train).as_dataloader(
    batch_size=config.data_config.batch_size,
    shuffle=config.data_config.shuffle
)
test_loader = DatasetPytorch(X_test, y_test).as_dataloader(
    batch_size=config.data_config.batch_size,
    shuffle=config.data_config.shuffle
)

In [None]:
model = config.model_config.model
model

In [None]:
trainer = Trainer(
    model=model,
    train_loader=train_loader,
    val_loader=test_loader,
    loss_fn=config.train_config.loss_fn,
    optimizer=config.train_config.optimizer,
    scheduler=config.train_config.scheduler,
    num_epochs=config.train_config.num_epochs,
    device=config.train_config.device,
    metrics=config.train_config.metrics,
    save_path=config.train_config.save_path
)

In [None]:
model, history = trainer.train()

In [24]:
log_experiment(
    config=config, 
    model=model, 
    history=history,
    input_data_sample=next(iter(train_loader))[0].to(trainer.device))

2025/06/17 19:12:42 INFO mlflow.tracking.fluent: Experiment with name 'Cur Experiment_1750180362.4239957' does not exist. Creating a new experiment.
Registered model 'Cur Model' already exists. Creating a new version of this model...
2025/06/17 19:12:51 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Cur Model, version 2
Created version '2' of model 'Cur Model'.


🏃 View run righteous-wolf-266 at: http://127.0.0.1:8080/#/experiments/649602426792884185/runs/712940f7dce44b3a9b9a8daef219deb0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/649602426792884185


In [None]:
import itertools 
from sklearn.base import clone
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.datasets import make_classification
import lightgbm as lgb


def evaluate_model_accuracy(model, param_grid=None):
    def evaluate_cur_model(cur_model, best_accuracy): 
        cur_model = cur_model.fit(X_train, y_train)
        test_preds = cur_model.predict(X_test)
        best_accuracy = accuracy_score(y_test, test_preds)

        return best_accuracy


    best_accuracy = 0
    best_params = None 
    if param_grid:
        keys, values = zip(*param_grid.items())
        param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

        for param_compbination in param_combinations: 
            cur_model = clone(model).set_params(**param_compbination)
            cur_accuracy = evaluate_cur_model(cur_model, best_accuracy)
            if cur_accuracy > best_accuracy: 
                best_accuracy = cur_accuracy
                best_params = param_compbination
    else: 
        cur_accuracy = evaluate_cur_model(model, best_accuracy)
        if cur_accuracy > best_accuracy: 
            best_accuracy = cur_accuracy
            best_params = None

    print(f'Best accuracy: {best_accuracy}, bestparams: {best_params}')

lgb_model = lgb.LGBMClassifier(
    n_estimators=1000, 
    learning_rate=0.001,
    max_depth=5,
    num_leaves=31, 
    objective='multiclass', 
    num_class=3
    )
evaluate_model_accuracy(lgb_model)