## Read Data from Postgres

In [1]:
import pandas as pd
import numpy as np
import warnings
import joblib
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

from catboost import CatBoostRegressor
import optuna
import lightgbm as lgb
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import numpy as np

from dotenv import load_dotenv
load_dotenv()

import os
import joblib
import sys
sys.path.append('../scripts')

from process import process_data, split_dataframe_by_period, read_data

models_directory = "../models"

os.makedirs(models_directory, exist_ok=True)
print(f"Models will be saved in: {os.path.abspath(models_directory)}")

Models will be saved in: /Users/cagrigoksel/PycharmProjects/Stock-Prediction-Term-Assignment/models


In [2]:
df = read_data("historical_stock_data")
train_df, test_df, validation_df, prediction_df = process_data(df, split_periods=True)
df = read_data("historical_stock_data")

In [3]:
train_df.shape

(3719, 77)

In [4]:
print(train_df.shape)
print(test_df.shape)
print(validation_df.shape)
print(prediction_df.shape)

(3719, 77)
(1413, 77)
(468, 77)
(468, 77)


In [5]:
unique_stocks = train_df['stock_name'].unique().tolist()
train_df.columns

Index(['date', 'stock_name', 'open', 'high', 'low', 'close', 'adj_close',
       'volume', 'weekly_price_diff', 'range', 'gap', 'volatility',
       'volume_ma', 'volume_ratio', 'rsi_2w', 'normalized_rsi_2w',
       'momentum_4w', 'momentum_8w', 'ema12', 'ema26', 'macd', 'signal_line',
       'MACD_Distance', 'macd_short', 'macd_long', 'macd_diff', 'low14',
       'high14', '%K', '%D', 'Stochastic_K_D_Distance', 'obv', 'vwap',
       'VWAP_Distance', 'hl', 'hc', 'lc', 'tr', 'atr', 'ATR_Percentage',
       'Pivot', 'Pivot_Distance', 'Support_Distance', 'Resistance_Distance',
       'weekly_return', 'next_weekly_return', 'adj_close_lag_1w',
       'volume_lag_1w', 'weekly_return_lag_1w', 'adj_close_lag_2w',
       'volume_lag_2w', 'weekly_return_lag_2w', 'adj_close_lag_3w',
       'volume_lag_3w', 'weekly_return_lag_3w', 'adj_close_lag_4w',
       'volume_lag_4w', 'weekly_return_lag_4w', 'price_to_range_ratio',
       'volume_change', 'adj_close_change', 'momentum_2w',
       'weekly_ret

In [6]:
df.dtypes

date          datetime64[ns]
open                 float64
high                 float64
low                  float64
close                float64
adj_close            float64
volume                 int64
stock_name            object
dtype: object

In [7]:
features = ['adj_close',
       'volume', 'weekly_price_diff', 'range', 'gap', 'volatility',
       'volume_ma', 'volume_ratio', 'rsi_2w', 'normalized_rsi_2w',
       'momentum_4w', 'momentum_8w', 'ema12', 'ema26', 'macd', 'signal_line',
       'MACD_Distance', 'macd_short', 'macd_long', 'macd_diff', 'low14',
       'high14', '%K', '%D', 'Stochastic_K_D_Distance', 'obv', 'vwap',
       'VWAP_Distance', 'hl', 'hc', 'lc', 'tr', 'atr', 'ATR_Percentage',
       'Pivot', 'Pivot_Distance', 'Support_Distance', 'Resistance_Distance',
       'weekly_return', 'adj_close_lag_1w',
       'volume_lag_1w', 'weekly_return_lag_1w', 'adj_close_lag_2w',
       'volume_lag_2w', 'weekly_return_lag_2w', 'adj_close_lag_3w',
       'volume_lag_3w', 'weekly_return_lag_3w', 'adj_close_lag_4w',
       'volume_lag_4w', 'weekly_return_lag_4w', 'price_to_range_ratio',
       'volume_change', 'adj_close_change', 'momentum_2w',
       'weekly_return_lag_5w', 'weekly_return_lag_6w', 'weekly_return_lag_7w',
       'weekly_return_lag_8w', 'rolling_mean_4w', 'rolling_std_4w',
       'bollinger_upper', 'bollinger_lower', 'bollinger_bandwidth',
       'rolling_skew_4w', 'rolling_kurt_4w', 'sharpe_ratio', 'day_of_week',
       'week_of_year', 'month']

In [8]:
def backtest_top3_with_sharpe(
    predictions_df, data, unique_stocks, initial_balance=100000, action_fee=0.001,
    risk_free_rate=0.0, take_profit=0.04):
    
    total_cash = initial_balance
    holdings = {stock: 0 for stock in unique_stocks}
    purchase_prices = {stock: 0 for stock in unique_stocks}
    actions = []  # List to store actions
    trade_returns = []  # List to store trade returns

    predictions_df = predictions_df.sort_values(by='date')

    for date in predictions_df['date'].unique():
        daily_predictions = predictions_df[predictions_df['date'] == date]
        top_3_stocks = daily_predictions.nlargest(5, 'predicted_return')['stock_name'].tolist()

        for stock in unique_stocks:
            stock_data = data[data['stock_name'] == stock].reset_index(drop=True)
            current_day_data = stock_data[stock_data['date'] == date]

            if len(current_day_data) == 0 or stock not in top_3_stocks:
                continue

            current_price = current_day_data.iloc[0]['adj_close']
            next_day_data = stock_data[stock_data['date'] > date].iloc[:1]
            if next_day_data.empty:
                continue

            next_open_price = next_day_data.iloc[0]['open']

            # Buy logic
            if stock in top_3_stocks and holdings[stock] == 0:
                investment_amount = total_cash / 3
                shares_to_buy = max(1, investment_amount // next_open_price)

                if shares_to_buy > 0:
                    total_cash -= shares_to_buy * next_open_price * (1 + action_fee)
                    holdings[stock] += shares_to_buy
                    purchase_prices[stock] = next_open_price
                    actions.append({
                        'signal_date': date,
                        'trade_date': next_day_data.iloc[0]['date'],
                        'stock': stock,
                        'action': 'buy',
                        'price': next_open_price,
                        'shares': shares_to_buy,
                        'total_cash': total_cash,
                        'portfolio_value': total_cash + sum(
                            holdings[s] * stock_data[stock_data['date'] <= date].iloc[-1]['close']
                            for s in unique_stocks
                        ),
                        'profit_percentage': 0  # No profit percentage on buy
                    })

            # Sell logic with take profit and stop loss
            if holdings[stock] > 0:
                # Calculate return
                trade_return = (next_open_price - purchase_prices[stock]) / purchase_prices[stock]

                # Check thresholds
                if trade_return >= take_profit:
                    total_cash += holdings[stock] * next_open_price * (1 - action_fee)
                    trade_returns.append(trade_return)
                    actions.append({
                        'signal_date': date,
                        'trade_date': next_day_data.iloc[0]['date'],
                        'stock': stock,
                        'action': 'sell',
                        'price': next_open_price,
                        'shares': holdings[stock],
                        'total_cash': total_cash,
                        'portfolio_value': total_cash + sum(
                            holdings[s] * stock_data[stock_data['date'] <= date].iloc[-1]['close']
                            for s in unique_stocks
                        ),
                        'profit_percentage': trade_return * 100
                    })
                    holdings[stock] = 0  # Reset holdings

    # Convert actions to a DataFrame
    actions_df = pd.DataFrame(actions)

    # Calculate final portfolio value
    final_portfolio_value = total_cash + sum(
        holdings[stock] * data[data['stock_name'] == stock].iloc[-1]['close']
        for stock in unique_stocks
    )
    net_profit = final_portfolio_value - initial_balance
    roi = (net_profit / initial_balance) * 100

    # Calculate Sharpe Ratio
    if trade_returns:
        average_return = np.mean(trade_returns)
        std_dev_return = np.std(trade_returns)
        sharpe_ratio = (average_return - risk_free_rate) / (std_dev_return + 1e-8)
    else:
        sharpe_ratio = 0

    return final_portfolio_value, net_profit, roi, sharpe_ratio, actions_df


In [9]:
def generic_objective(trial, model_type, optimize_metric):
    """
    Generic objective function to optimize ROI or Sharpe Ratio for different model types.
    
    Args:
        trial: Optuna trial object.
        model_type: One of 'lightgbm', 'xgboost', 'catboost'.
        optimize_metric: 'roi' or 'sharpe_ratio'.
    
    Returns:
        Negative of the selected metric (to maximize).
    """
    # Fix the trailing comma issue
    X_train = train_df[features]  # Ensure this is a DataFrame
    X_test = test_df[features]  # Ensure this is a DataFrame
    y_train = train_df['next_weekly_return']
    y_test = test_df['next_weekly_return']

    selected_features = []
    for feature in features:
        if trial.suggest_categorical(feature, (True, False)):
            selected_features.append(feature)

    if model_type == "lightgbm":
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10),  
            'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10),  
            'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.1, 1.0),
            'feature_fraction': trial.suggest_uniform('feature_fraction', 0.1, 1.0),
            'objective': 'regression',
            'metric': 'rmse',
            'verbosity': -1,
            'random_state': 42
        }

        train_data = lgb.Dataset(X_train[selected_features], label=y_train)
        valid_data = lgb.Dataset(X_test[selected_features], label=y_test, reference=train_data)
        callbacks = [lgb.early_stopping(stopping_rounds=100, verbose=False)]
        model = lgb.train(params, train_data, valid_sets=[valid_data], num_boost_round=1000, callbacks=callbacks)

        predictions = model.predict(X_test[selected_features])

    elif model_type == "xgboost":
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10),  
            'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10),  
            'subsample': trial.suggest_uniform('subsample', 0.1, 1.0),
            'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.1, 1.0),
            'objective': 'reg:squarederror',
            'eval_metric': 'rmse',
            'random_state': 42
        }

        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]

        train_data = xgb.DMatrix(X_train_selected, label=y_train)
        test_data = xgb.DMatrix(X_test_selected, label=y_test)

        model = xgb.train(
            params,
            train_data,
            num_boost_round=1000,
            evals=[(test_data, 'validation')],
            early_stopping_rounds=100,
            verbose_eval=False
        )

        predictions = model.predict(test_data)

    elif model_type == "catboost":
        params = {
            'iterations': trial.suggest_int('iterations', 100, 1000),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
            'depth': trial.suggest_int('depth', 3, 15),
            'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10),
            'bagging_temperature': trial.suggest_uniform('bagging_temperature', 0, 1),
            'random_strength': trial.suggest_loguniform('random_strength', 1e-3, 10),
            'verbose': 0,
            'loss_function': 'RMSE',
            'random_seed': 42
        }

        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]

        model = CatBoostRegressor(**params)
        model.fit(X_train_selected, y_train, eval_set=(X_test_selected, y_test), early_stopping_rounds=100, verbose=False)

        predictions = model.predict(X_test_selected)

    else:
        raise ValueError("Invalid model_type. Choose from 'lightgbm', 'xgboost', 'catboost'.")

    # Generate predictions and backtest
    predictions_df = pd.DataFrame({
        'date': test_df['date'],
        'stock_name': test_df['stock_name'],
        'predicted_return': predictions
    })

    unique_stocks = test_df['stock_name'].unique().tolist()
    _, _, roi, sharpe_ratio, _ = backtest_top3_with_sharpe(predictions_df, df, unique_stocks)

    if optimize_metric == "roi":
        return -roi  # Minimize negative ROI to maximize ROI
    elif optimize_metric == "sharpe_ratio":
        return -sharpe_ratio  # Minimize negative Sharpe Ratio to maximize Sharpe Ratio
    else:
        raise ValueError("Invalid optimize_metric. Choose from 'roi', 'sharpe_ratio'.")

In [10]:
model_types = ["lightgbm", "xgboost", "catboost"]
results = {}

for model_type in model_types:
    print(f"Optimizing {model_type} for ROI...")
    roi_study = optuna.create_study(direction='minimize')
    roi_study.optimize(lambda trial: generic_objective(trial, model_type, "roi"), n_trials=10)
    best_roi_params = roi_study.best_params

    print(f"Optimizing {model_type} for Sharpe Ratio...")
    sharpe_study = optuna.create_study(direction='minimize')
    sharpe_study.optimize(lambda trial: generic_objective(trial, model_type, "sharpe_ratio"), n_trials=10)
    best_sharpe_params = sharpe_study.best_params

    results[model_type] = {
        "best_roi_params": best_roi_params,
        "best_sharpe_params": best_sharpe_params
    }

[I 2025-01-10 12:43:05,206] A new study created in memory with name: no-name-30a033ce-f319-44a0-a52f-d5553a134a84


Optimizing lightgbm for ROI...


[I 2025-01-10 12:43:09,302] Trial 0 finished with value: -281.39424316727275 and parameters: {'adj_close': False, 'volume': False, 'weekly_price_diff': False, 'range': True, 'gap': True, 'volatility': False, 'volume_ma': True, 'volume_ratio': True, 'rsi_2w': True, 'normalized_rsi_2w': True, 'momentum_4w': False, 'momentum_8w': False, 'ema12': True, 'ema26': True, 'macd': True, 'signal_line': True, 'MACD_Distance': True, 'macd_short': True, 'macd_long': True, 'macd_diff': True, 'low14': True, 'high14': False, '%K': True, '%D': True, 'Stochastic_K_D_Distance': True, 'obv': False, 'vwap': True, 'VWAP_Distance': False, 'hl': True, 'hc': False, 'lc': True, 'tr': False, 'atr': True, 'ATR_Percentage': False, 'Pivot': False, 'Pivot_Distance': False, 'Support_Distance': False, 'Resistance_Distance': True, 'weekly_return': True, 'adj_close_lag_1w': False, 'volume_lag_1w': False, 'weekly_return_lag_1w': True, 'adj_close_lag_2w': False, 'volume_lag_2w': True, 'weekly_return_lag_2w': False, 'adj_cl

Optimizing lightgbm for Sharpe Ratio...


[I 2025-01-10 12:43:39,432] Trial 0 finished with value: -1.1566919595213303 and parameters: {'adj_close': True, 'volume': False, 'weekly_price_diff': False, 'range': True, 'gap': False, 'volatility': True, 'volume_ma': True, 'volume_ratio': True, 'rsi_2w': False, 'normalized_rsi_2w': True, 'momentum_4w': True, 'momentum_8w': True, 'ema12': True, 'ema26': False, 'macd': True, 'signal_line': False, 'MACD_Distance': False, 'macd_short': False, 'macd_long': True, 'macd_diff': False, 'low14': True, 'high14': False, '%K': True, '%D': True, 'Stochastic_K_D_Distance': True, 'obv': False, 'vwap': True, 'VWAP_Distance': False, 'hl': True, 'hc': False, 'lc': True, 'tr': False, 'atr': False, 'ATR_Percentage': True, 'Pivot': True, 'Pivot_Distance': False, 'Support_Distance': False, 'Resistance_Distance': False, 'weekly_return': False, 'adj_close_lag_1w': False, 'volume_lag_1w': True, 'weekly_return_lag_1w': True, 'adj_close_lag_2w': True, 'volume_lag_2w': False, 'weekly_return_lag_2w': False, 'adj

Optimizing xgboost for ROI...


[I 2025-01-10 12:44:07,985] Trial 0 finished with value: -429.2709107704255 and parameters: {'adj_close': False, 'volume': True, 'weekly_price_diff': False, 'range': True, 'gap': True, 'volatility': True, 'volume_ma': True, 'volume_ratio': True, 'rsi_2w': False, 'normalized_rsi_2w': False, 'momentum_4w': True, 'momentum_8w': False, 'ema12': True, 'ema26': True, 'macd': True, 'signal_line': False, 'MACD_Distance': True, 'macd_short': True, 'macd_long': False, 'macd_diff': False, 'low14': True, 'high14': True, '%K': False, '%D': False, 'Stochastic_K_D_Distance': True, 'obv': False, 'vwap': True, 'VWAP_Distance': False, 'hl': True, 'hc': False, 'lc': False, 'tr': True, 'atr': True, 'ATR_Percentage': True, 'Pivot': True, 'Pivot_Distance': True, 'Support_Distance': False, 'Resistance_Distance': False, 'weekly_return': True, 'adj_close_lag_1w': True, 'volume_lag_1w': False, 'weekly_return_lag_1w': True, 'adj_close_lag_2w': False, 'volume_lag_2w': False, 'weekly_return_lag_2w': False, 'adj_cl

Optimizing xgboost for Sharpe Ratio...


[I 2025-01-10 12:44:37,682] Trial 0 finished with value: -0.971284084770769 and parameters: {'adj_close': True, 'volume': True, 'weekly_price_diff': False, 'range': False, 'gap': False, 'volatility': False, 'volume_ma': True, 'volume_ratio': False, 'rsi_2w': False, 'normalized_rsi_2w': False, 'momentum_4w': True, 'momentum_8w': False, 'ema12': True, 'ema26': False, 'macd': True, 'signal_line': True, 'MACD_Distance': False, 'macd_short': False, 'macd_long': True, 'macd_diff': True, 'low14': False, 'high14': False, '%K': False, '%D': True, 'Stochastic_K_D_Distance': False, 'obv': True, 'vwap': False, 'VWAP_Distance': True, 'hl': False, 'hc': False, 'lc': False, 'tr': False, 'atr': False, 'ATR_Percentage': True, 'Pivot': False, 'Pivot_Distance': True, 'Support_Distance': True, 'Resistance_Distance': True, 'weekly_return': False, 'adj_close_lag_1w': False, 'volume_lag_1w': True, 'weekly_return_lag_1w': False, 'adj_close_lag_2w': False, 'volume_lag_2w': True, 'weekly_return_lag_2w': True, '

Optimizing catboost for ROI...


[I 2025-01-10 12:45:10,406] Trial 0 finished with value: -416.0777867521822 and parameters: {'adj_close': True, 'volume': True, 'weekly_price_diff': True, 'range': True, 'gap': True, 'volatility': False, 'volume_ma': True, 'volume_ratio': True, 'rsi_2w': False, 'normalized_rsi_2w': False, 'momentum_4w': False, 'momentum_8w': True, 'ema12': False, 'ema26': False, 'macd': False, 'signal_line': True, 'MACD_Distance': False, 'macd_short': True, 'macd_long': True, 'macd_diff': True, 'low14': False, 'high14': False, '%K': False, '%D': True, 'Stochastic_K_D_Distance': False, 'obv': False, 'vwap': False, 'VWAP_Distance': False, 'hl': False, 'hc': True, 'lc': False, 'tr': False, 'atr': False, 'ATR_Percentage': False, 'Pivot': False, 'Pivot_Distance': False, 'Support_Distance': True, 'Resistance_Distance': False, 'weekly_return': True, 'adj_close_lag_1w': True, 'volume_lag_1w': False, 'weekly_return_lag_1w': False, 'adj_close_lag_2w': True, 'volume_lag_2w': False, 'weekly_return_lag_2w': True, '

Optimizing catboost for Sharpe Ratio...


[I 2025-01-10 12:47:35,699] Trial 0 finished with value: -1.2730808409025052 and parameters: {'adj_close': False, 'volume': True, 'weekly_price_diff': True, 'range': False, 'gap': False, 'volatility': False, 'volume_ma': False, 'volume_ratio': False, 'rsi_2w': True, 'normalized_rsi_2w': True, 'momentum_4w': True, 'momentum_8w': False, 'ema12': True, 'ema26': False, 'macd': False, 'signal_line': False, 'MACD_Distance': True, 'macd_short': False, 'macd_long': False, 'macd_diff': False, 'low14': False, 'high14': False, '%K': True, '%D': False, 'Stochastic_K_D_Distance': True, 'obv': False, 'vwap': False, 'VWAP_Distance': False, 'hl': False, 'hc': True, 'lc': False, 'tr': True, 'atr': False, 'ATR_Percentage': True, 'Pivot': True, 'Pivot_Distance': False, 'Support_Distance': True, 'Resistance_Distance': False, 'weekly_return': False, 'adj_close_lag_1w': True, 'volume_lag_1w': True, 'weekly_return_lag_1w': True, 'adj_close_lag_2w': True, 'volume_lag_2w': False, 'weekly_return_lag_2w': True, 

In [11]:
for model_type, params in results.items():
    # Save the best features for ROI
    roi_features = [feature for feature in features if params['best_roi_params'].get(feature)]
    X_train_roi = train_df[roi_features]
    y_train_roi = train_df['next_weekly_return']
    X_test_roi = test_df[roi_features]
    y_test_roi = test_df['next_weekly_return']

    # Extract hyperparameters for ROI model
    roi_hyperparams = {k: v for k, v in params['best_roi_params'].items() if k not in features}

    # Train and evaluate the ROI model
    if model_type == "lightgbm":
        roi_model = lgb.train(roi_hyperparams, lgb.Dataset(X_train_roi, label=y_train_roi), num_boost_round=1000)
        predictions = roi_model.predict(X_test_roi)
    elif model_type == "xgboost":
        train_data = xgb.DMatrix(X_train_roi, label=y_train_roi)
        test_data = xgb.DMatrix(X_test_roi)
        roi_model = xgb.train(roi_hyperparams, train_data, num_boost_round=1000, verbose_eval=False)
        predictions = roi_model.predict(test_data)
    elif model_type == "catboost":
        roi_model = CatBoostRegressor(**roi_hyperparams)  # Pass only hyperparameters
        roi_model.fit(X_train_roi, y_train_roi, eval_set=(X_test_roi, y_test_roi), early_stopping_rounds=100, verbose=False)
        predictions = roi_model.predict(X_test_roi)

    # Save the ROI model and features
    joblib.dump(roi_model, os.path.join(models_directory, f"{model_type}_roi_model.pkl"))
    joblib.dump(roi_features, os.path.join(models_directory, f"{model_type}_roi_features.pkl"))

    # Calculate and print metrics for ROI model
    rmse = np.sqrt(mean_squared_error(y_test_roi, predictions))
    mae = mean_absolute_error(y_test_roi, predictions)
    predictions_df = pd.DataFrame({
        'date': test_df['date'],
        'stock_name': test_df['stock_name'],
        'predicted_return': predictions
    })
    unique_stocks = test_df['stock_name'].unique().tolist()
    final_portfolio_value, net_profit, roi, sharpe_ratio, _ = backtest_top3_with_sharpe(predictions_df, test_df, unique_stocks)

    print(f"{model_type} ROI Model Metrics:")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  ROI: {roi:.2f}%")
    print(f"  Sharpe Ratio: {sharpe_ratio:.4f}")
    print(f"  Net Profit: {net_profit:.2f}")

    # Save the best features for Sharpe Ratio
    sharpe_features = [feature for feature in features if params['best_sharpe_params'].get(feature)]
    X_train_sharpe = train_df[sharpe_features]
    y_train_sharpe = train_df['next_weekly_return']
    X_test_sharpe = test_df[sharpe_features]
    y_test_sharpe = test_df['next_weekly_return']

    # Extract hyperparameters for Sharpe Ratio model
    sharpe_hyperparams = {k: v for k, v in params['best_sharpe_params'].items() if k not in features}

    # Train and evaluate the Sharpe Ratio model
    if model_type == "lightgbm":
        sharpe_model = lgb.train(sharpe_hyperparams, lgb.Dataset(X_train_sharpe, label=y_train_sharpe), num_boost_round=1000)
        predictions = sharpe_model.predict(X_test_sharpe)
    elif model_type == "xgboost":
        train_data = xgb.DMatrix(X_train_sharpe, label=y_train_sharpe)
        test_data = xgb.DMatrix(X_test_sharpe)
        sharpe_model = xgb.train(sharpe_hyperparams, train_data, num_boost_round=1000, verbose_eval=False)
        predictions = sharpe_model.predict(test_data)
    elif model_type == "catboost":
        sharpe_model = CatBoostRegressor(**sharpe_hyperparams)
        sharpe_model.fit(X_train_sharpe, y_train_sharpe, eval_set=(X_test_sharpe, y_test_sharpe), early_stopping_rounds=100, verbose=False)
        predictions = sharpe_model.predict(X_test_sharpe)

    # Save the Sharpe Ratio model and features
    joblib.dump(sharpe_model, os.path.join(models_directory, f"{model_type}_sharpe_model.pkl"))
    joblib.dump(sharpe_features, os.path.join(models_directory, f"{model_type}_sharpe_features.pkl"))

    # Calculate and print metrics for Sharpe Ratio model
    rmse = np.sqrt(mean_squared_error(y_test_sharpe, predictions))
    mae = mean_absolute_error(y_test_sharpe, predictions)
    predictions_df = pd.DataFrame({
        'date': test_df['date'],
        'stock_name': test_df['stock_name'],
        'predicted_return': predictions
    })
    unique_stocks = test_df['stock_name'].unique().tolist()
    final_portfolio_value, net_profit, roi, sharpe_ratio, _ = backtest_top3_with_sharpe(predictions_df, test_df, unique_stocks)

    print(f"{model_type} Sharpe Ratio Model Metrics:")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  ROI: {roi:.2f}%")
    print(f"  Sharpe Ratio: {sharpe_ratio:.4f}")
    print(f"  Net Profit: {net_profit:.2f}")


lightgbm ROI Model Metrics:
  RMSE: 0.0567
  MAE: 0.0403
  ROI: 46.14%
  Sharpe Ratio: 1.5428
  Net Profit: 46139.69
lightgbm Sharpe Ratio Model Metrics:
  RMSE: 0.0560
  MAE: 0.0399
  ROI: 61.35%
  Sharpe Ratio: 1.1377
  Net Profit: 61352.75
xgboost ROI Model Metrics:
  RMSE: 0.0548
  MAE: 0.0384
  ROI: 75.35%
  Sharpe Ratio: 1.2173
  Net Profit: 75346.19
xgboost Sharpe Ratio Model Metrics:
  RMSE: 0.0583
  MAE: 0.0417
  ROI: 38.82%
  Sharpe Ratio: 1.0935
  Net Profit: 38823.16
catboost ROI Model Metrics:
  RMSE: 0.0539
  MAE: 0.0377
  ROI: 31.12%
  Sharpe Ratio: 0.4462
  Net Profit: 31119.26
catboost Sharpe Ratio Model Metrics:
  RMSE: 0.0539
  MAE: 0.0377
  ROI: 36.53%
  Sharpe Ratio: 1.3917
  Net Profit: 36527.85
