In [19]:
# Necessary imports
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import QuantileTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score, matthews_corrcoef
from datetime import datetime

# Preprocessing function (same as original)
def preprocessing(df_raw, stop_loss=0, scale=True):
    if stop_loss == 0:
        target_col = 'Y'
    elif stop_loss == 2:
        target_col = 'Y_2'
    elif stop_loss == 3:
        target_col = 'Y_3'
    elif stop_loss == 4:
        target_col = 'Y_4'
    elif stop_loss == 5:
        target_col = 'Y_5'
    else:
        raise ValueError('Error')
    
    df_raw = df_raw.copy()  
    df_raw['Date'] = pd.to_datetime(df_raw['Date'])
    
    if 'Close' in df_raw.columns:
        df_Close = df_raw['Close']
        data_Close = df_Close.values
        df_raw = df_raw.drop(["Close"], axis=1)
    
    if 'Stock' in df_raw.columns:
        df_raw = df_raw.drop(["Stock"], axis=1)
    
    df_raw = df_raw.dropna()
    
    target_cols = ['Y', 'Y_2', 'Y_3', 'Y_4', 'Y_5']
    target_cols_exist = [col for col in target_cols if col in df_raw.columns]
    
    if target_cols_exist:
        df_raw[target_cols_exist] = df_raw[target_cols_exist].apply(lambda x: x.map({'SELL': 0, 'BUY': 1}))
    
    train_df = df_raw[(df_raw['Date'] >= '2020-01-01') & (df_raw['Date'] <= '2022-12-31')]
    valid_df = df_raw[(df_raw['Date'] >= '2023-01-01') & (df_raw['Date'] <= '2023-12-31')]
    test_df = df_raw[(df_raw['Date'] >= '2024-01-01')]
    
    if len(train_df) == 0:
        raise ValueError(f"Training data is empty. Check date range.")
    if len(valid_df) == 0:
        raise ValueError(f"Validation data is empty. Check date range.")
    if len(test_df) == 0:
        raise ValueError(f"Test data is empty. Check date range.")
    
    feature_cols = df_raw.drop(['Date'] + target_cols_exist, axis=1).columns
    
    if scale:
        # Train scaler on training data only
        train_data = train_df[feature_cols].values.astype(np.float64)
        
        # Add small noise to prevent quantization issues
        stds = np.std(train_data, axis=0, keepdims=True)
        noise_std = 1e-3 / np.maximum(stds, 1e-3)
        train_data_noisy = train_data + noise_std * np.random.randn(*train_data.shape)
        
        # Transform to normal distribution using QuantileTransformer
        scaler = QuantileTransformer(output_distribution='normal', random_state=1004)
        scaler.fit(train_data_noisy)
        
        # Apply scaler to all datasets
        train_df_scaled = train_df.copy()
        valid_df_scaled = valid_df.copy()
        test_df_scaled = test_df.copy()
        
        train_df_scaled[feature_cols] = scaler.transform(train_df[feature_cols].values)
        valid_df_scaled[feature_cols] = scaler.transform(valid_df[feature_cols].values)
        test_df_scaled[feature_cols] = scaler.transform(test_df[feature_cols].values)
        
        # Create final dataframes (features + Date + selected target column)
        final_cols = list(feature_cols) + ['Date', target_col]
        train_final = train_df_scaled[final_cols]
        valid_final = valid_df_scaled[final_cols]
        test_final = test_df_scaled[final_cols]
        
        return train_final, valid_final, test_final, scaler
    else:
        # If not scaling, keep only necessary columns
        final_cols = list(feature_cols) + ['Date', target_col]
        train_final = train_df[final_cols]
        valid_final = valid_df[final_cols]
        test_final = test_df[final_cols]
        
        return train_final, valid_final, test_final, None

# Function to select features based on importance (top or bottom)
def select_features(train_df, valid_df, test_df, feature_importance_df, 
                    feature_type='global', top_or_bottom='top', k=10, target_col='Y'):
    """
    Select features based on importance and return filtered dataframes.
    
    Parameters:
    -----------
    train_df, valid_df, test_df : pandas.DataFrame
        The input dataframes to filter
    feature_importance_df : pandas.DataFrame
        DataFrame containing feature importance rankings
    feature_type : str, default='global'
        Type of feature importance to use ('global' or 'leaves')
    top_or_bottom : str, default='top'
        Whether to select top features ('top') or bottom features ('bottom')
    k : int, default=10
        Number of features to select
    target_col : str, default='Y'
        The target column name
        
    Returns:
    --------
    tuple
        Filtered (train_df, valid_df, test_df)
    """
    # Set ascending parameter based on top_or_bottom
    if top_or_bottom.lower() == 'top':
        ascending = False  # For top features (highest importance first)
    elif top_or_bottom.lower() == 'bottom':
        ascending = True   # For bottom features (lowest importance first)
    else:
        raise ValueError("top_or_bottom must be either 'top' or 'bottom'")
    
    # Sort features by importance and select k features
    sorted_features = feature_importance_df.sort_values("importance", ascending=ascending).head(k)["feature_name"].tolist()
    
    # Filter dataframes to include only the selected features and target column
    filtered_train_df = train_df[sorted_features + ['Date', target_col]]
    filtered_valid_df = valid_df[sorted_features + ['Date', target_col]]
    filtered_test_df = test_df[sorted_features + ['Date', target_col]]
    
    # Print selected features for reference
    print(f"Selected {top_or_bottom} {k} features based on {feature_type} importance:")
    for i, feature in enumerate(sorted_features, 1):
        print(f"{i}. {feature}")
    
    return filtered_train_df, filtered_valid_df, filtered_test_df

# Function to calculate and print model metrics
def print_model_metrics(y_true, y_pred, dataset="Dataset"):
    """
    Calculate and print model evaluation metrics.
    
    Parameters:
    -----------
    y_true : array-like
        True labels
    y_pred : array-like
        Predicted labels
    dataset : str, default="Dataset"
        Name of the dataset for printing
    """
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    weighted_f1 = f1_score(y_true, y_pred, average='weighted')
    mcc = matthews_corrcoef(y_true, y_pred)
    
    # Print metrics
    print(f"\n{dataset} Metrics:")
    print(f"ACC: {accuracy:.4f}")
    print(f"Macro F1: {macro_f1:.4f}")
    print(f"Weighted F1: {weighted_f1:.4f}")
    print(f"MCC: {mcc:.4f}")
    
    return {
        'accuracy': accuracy,
        'macro_f1': macro_f1,
        'weighted_f1': weighted_f1,
        'mcc': mcc
    }

# Function to train model and calculate strategy performance
def train_and_evaluate_model(train_df, valid_df, test_df, strategy_name='Model', target_col='Y', trading_cost_pct=0.001):
    """
    Train a logistic regression model and evaluate its performance.
    
    Parameters:
    -----------
    train_df, valid_df, test_df : pandas.DataFrame
        The preprocessed dataframes with selected features
    strategy_name : str, default='Model'
        Name of the strategy for reporting
    target_col : str, default='Y'
        The target column name
    trading_cost_pct : float, default=0.001
        Transaction cost as a percentage (0.001 = 0.1%)
        
    Returns:
    --------
    dict
        Performance metrics and data for the strategy
    """
    # Split features and target
    X_train = train_df.drop(['Date', target_col], axis=1)
    y_train = train_df[target_col]

    X_valid = valid_df.drop(['Date', target_col], axis=1)
    y_valid = valid_df[target_col]

    X_test = test_df.drop(['Date', target_col], axis=1)
    y_test = test_df[target_col]

    # Initialize and train model
    log_model = LogisticRegression(random_state=42, max_iter=1000, solver='lbfgs')
    log_model.fit(X_train, y_train)

    # Evaluate on validation set
    y_valid_pred = log_model.predict(X_valid)
    print(f"\n{strategy_name} Validation Set Evaluation:")
    valid_metrics = print_model_metrics(y_valid, y_valid_pred, dataset="Validation Set")

    # Generate predictions on the test set
    y_test_pred = log_model.predict(X_test)
    print(f"\n{strategy_name} Test Set Evaluation:")
    test_metrics = print_model_metrics(y_test, y_test_pred, dataset="Test Set")
    
    # Combine test dates, prices, and predictions
    TSLA_prices = pd.read_csv("./dataset/TSLA.csv").iloc[:, 1:]
    TSLA_prices = TSLA_prices[["Date", "Close"]]
    TSLA_prices['Date'] = pd.to_datetime(TSLA_prices['Date'])
    
    test_dates = test_df['Date'].reset_index(drop=True)
    signals_df = pd.DataFrame({
        'Date': test_dates,
        'Signal': y_test_pred
    })

    # Merge with original price data
    trading_df = pd.merge(TSLA_prices, signals_df, on='Date', how='inner')
    
    # Initialize trading strategy
    initial_capital = 10000  # Initial capital $10,000
    positions = 0  # Number of shares held
    cash = initial_capital  # Available cash
    portfolio_values = []  # Track daily portfolio value
    trade_log = []  # Record of all trades
    
    # Trading simulation
    for i in range(1, len(trading_df)):
        date = trading_df.iloc[i]['Date']
        price = trading_df.iloc[i]['Close']
        signal = trading_df.iloc[i]['Signal']
        prev_signal = trading_df.iloc[i-1]['Signal']
        
        # Trading logic: Buy when signal changes from 0 to 1, sell when signal changes from 1 to 0
        if signal == 1 and prev_signal == 0 and cash > 0:  # Buy signal
            # Calculate transaction cost first
            transaction_cost = cash * trading_cost_pct
            cash_available = cash - transaction_cost
            
            shares_to_buy = int(cash_available // price)  # Integer division to calculate whole shares
            if shares_to_buy > 0:
                cost = shares_to_buy * price
                total_cost = cost + transaction_cost
                positions += shares_to_buy
                cash -= total_cost
                trade_log.append({
                    'Date': date,
                    'Action': 'BUY',
                    'Price': price,
                    'Shares': shares_to_buy,
                    'Cost/Proceeds': cost,
                    'Transaction Fee': transaction_cost,
                    'Total Cost': total_cost,
                    'Positions': positions,
                    'Cash': cash
                })
        
        elif signal == 0 and prev_signal == 1 and positions > 0:  # Sell signal
            gross_proceeds = positions * price
            transaction_cost = gross_proceeds * trading_cost_pct
            net_proceeds = gross_proceeds - transaction_cost
            cash += net_proceeds
            trade_log.append({
                'Date': date,
                'Action': 'SELL',
                'Price': price,
                'Shares': positions,
                'Cost/Proceeds': gross_proceeds,
                'Transaction Fee': transaction_cost,
                'Total Proceeds': net_proceeds,
                'Positions': 0,
                'Cash': cash
            })
            positions = 0
        
        # Calculate daily portfolio value
        portfolio_value = cash + (positions * price)
        portfolio_values.append({
            'Date': date,
            'Portfolio_Value': portfolio_value,
            'Cash': cash,
            'Stock_Value': positions * price,
            'Price': price
        })
    
    # Create portfolio performance dataframe
    portfolio_df = pd.DataFrame(portfolio_values)
    
    # Calculate performance metrics
    if len(portfolio_df) > 0:  # Make sure we have data
        if isinstance(portfolio_df.iloc[0]['Date'], str):
            start_date = datetime.strptime(str(portfolio_df.iloc[0]['Date']), '%Y-%m-%d')
            end_date = datetime.strptime(str(portfolio_df.iloc[-1]['Date']), '%Y-%m-%d')
        else:
            start_date = portfolio_df.iloc[0]['Date']
            end_date = portfolio_df.iloc[-1]['Date']

        total_days = (end_date - start_date).days

        start_value = initial_capital
        end_value = portfolio_df.iloc[-1]['Portfolio_Value']

        # Total return
        total_return = (end_value / start_value - 1) * 100

        # Annualized return
        annualized_return = ((end_value / start_value) ** (365 / max(total_days, 1)) - 1) * 100

        # Sharpe ratio calculation
        portfolio_df['Daily_Return'] = portfolio_df['Portfolio_Value'].pct_change()
        if len(portfolio_df) > 1 and portfolio_df['Daily_Return'].std() > 0:
            sharpe_ratio = np.sqrt(252) * (portfolio_df['Daily_Return'].mean() / portfolio_df['Daily_Return'].std())
        else:
            sharpe_ratio = 0

        # Maximum drawdown
        portfolio_df['Cumulative_Max'] = portfolio_df['Portfolio_Value'].cummax()
        portfolio_df['Drawdown'] = (portfolio_df['Portfolio_Value'] / portfolio_df['Cumulative_Max'] - 1)
        max_drawdown = portfolio_df['Drawdown'].min() * 100
        
        # Calculate total transaction costs
        total_fees = sum([t['Transaction Fee'] for t in trade_log]) if trade_log else 0
    else:
        # Default values if no data
        total_return = 0
        annualized_return = 0
        sharpe_ratio = 0
        max_drawdown = 0
        end_value = initial_capital
        total_fees = 0
    
    # Create trade log dataframe
    trades_df = pd.DataFrame(trade_log)
    if len(trades_df) > 0:
        print(f"\n===== {strategy_name} Trade Log Summary =====")
        print(f"Total trades: {len(trades_df)}")
        buy_signals = [trade for trade in trade_log if trade['Action'] == 'BUY']
        sell_signals = [trade for trade in trade_log if trade['Action'] == 'SELL']
        print(f"Buy trades: {len(buy_signals)}")
        print(f"Sell trades: {len(sell_signals)}")
        print(f"Total Transaction Costs: ${total_fees:.2f}")
    else:
        print(f"\n{strategy_name}: No trades were executed.")
    
    # Return performance metrics and data
    return {
        'strategy_name': strategy_name,
        'model_metrics': {
            'validation': valid_metrics,
            'test': test_metrics
        },
        'performance': {
            'total_return': total_return,
            'annualized_return': annualized_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'final_value': end_value,
            'transaction_costs': total_fees
        },
        'log_model': log_model,
        'trading_df': trading_df,
        'portfolio_df': portfolio_df,
        'trade_log': trade_log
    }

# Calculate Buy and Hold strategy performance
def calculate_buy_hold(trading_df, initial_capital=10000, trading_cost_pct=0.001):
    """
    Calculate performance metrics for the Buy and Hold strategy.
    
    Parameters:
    -----------
    trading_df : pandas.DataFrame
        DataFrame with dates and prices
    initial_capital : float, default=10000
        Initial investment amount
    trading_cost_pct : float, default=0.001
        Transaction cost as a percentage
        
    Returns:
    --------
    dict
        Performance metrics and data for Buy and Hold
    """
    if len(trading_df) == 0:
        return {
            'strategy_name': 'Buy and Hold',
            'performance': {
                'total_return': 0,
                'annualized_return': 0,
                'sharpe_ratio': 0,
                'max_drawdown': 0,
                'final_value': initial_capital,
                'transaction_costs': 0
            },
            'portfolio_df': pd.DataFrame()
        }
    
    # Get initial and final prices
    initial_price = trading_df.iloc[0]['Close']
    final_price = trading_df.iloc[-1]['Close']
    
    # Calculate shares bought with initial capital after costs
    initial_transaction_cost = initial_capital * trading_cost_pct
    shares_bought = int((initial_capital - initial_transaction_cost) // initial_price)
    cash_after_purchase = initial_capital - (shares_bought * initial_price) - initial_transaction_cost
    
    # Calculate final value after selling all shares
    final_value_before_fees = shares_bought * final_price
    final_transaction_cost = final_value_before_fees * trading_cost_pct
    final_value = final_value_before_fees - final_transaction_cost + cash_after_purchase
    
    # Calculate total return
    total_return = (final_value / initial_capital - 1) * 100
    
    # Calculate Buy and Hold daily values
    buy_hold_values = []
    
    for i in range(len(trading_df)):
        date = trading_df.iloc[i]['Date']
        price = trading_df.iloc[i]['Close']
        portfolio_value = cash_after_purchase + (shares_bought * price)
        buy_hold_values.append({
            'Date': date,
            'Portfolio_Value': portfolio_value,
            'Price': price
        })
    
    buy_hold_df = pd.DataFrame(buy_hold_values)
    
    # Calculate annualized return
    if isinstance(buy_hold_df.iloc[0]['Date'], str):
        start_date = datetime.strptime(str(buy_hold_df.iloc[0]['Date']), '%Y-%m-%d')
        end_date = datetime.strptime(str(buy_hold_df.iloc[-1]['Date']), '%Y-%m-%d')
    else:
        start_date = buy_hold_df.iloc[0]['Date']
        end_date = buy_hold_df.iloc[-1]['Date']
    
    total_days = (end_date - start_date).days
    annualized_return = ((final_value / initial_capital) ** (365 / max(total_days, 1)) - 1) * 100
    
    # Calculate Sharpe ratio
    buy_hold_df['Daily_Return'] = buy_hold_df['Portfolio_Value'].pct_change()
    if len(buy_hold_df) > 1 and buy_hold_df['Daily_Return'].std() > 0:
        sharpe_ratio = np.sqrt(252) * (buy_hold_df['Daily_Return'].mean() / buy_hold_df['Daily_Return'].std())
    else:
        sharpe_ratio = 0
    
    # Calculate maximum drawdown
    buy_hold_df['Cumulative_Max'] = buy_hold_df['Portfolio_Value'].cummax()
    buy_hold_df['Drawdown'] = (buy_hold_df['Portfolio_Value'] / buy_hold_df['Cumulative_Max'] - 1)
    max_drawdown = buy_hold_df['Drawdown'].min() * 100
    
    # Calculate total transaction costs
    total_fees = initial_transaction_cost + final_transaction_cost
    
    # Print Buy and Hold summary
    print("\n===== Buy and Hold Strategy Summary =====")
    print(f"Shares purchased: {shares_bought}")
    print(f"Initial transaction cost: ${initial_transaction_cost:.2f}")
    print(f"Final transaction cost: ${final_transaction_cost:.2f}")
    print(f"Total transaction costs: ${total_fees:.2f}")
    print(f"Final value: ${final_value:.2f}")
    print(f"Total return: {total_return:.2f}%")
    
    return {
        'strategy_name': 'Buy and Hold',
        'performance': {
            'total_return': total_return,
            'annualized_return': annualized_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'final_value': final_value,
            'transaction_costs': total_fees
        },
        'portfolio_df': buy_hold_df
    }

# Function to compare strategies
def compare_strategies(strategies_data):
    """
    Compare multiple trading strategies.
    
    Parameters:
    -----------
    strategies_data : list
        List of strategy performance dictionaries
    """
    # Create performance comparison table
    performance_data = {
        'Metric': ['Initial Capital', 'Final Value', 'Total Return (%)', 'Annualized Return (%)', 
                  'Sharpe Ratio', 'Maximum Drawdown (%)', 'Transaction Costs']
    }
    
    # Process each strategy
    for strategy in strategies_data:
        strategy_name = strategy['strategy_name']
        perf = strategy['performance']
        
        # Add to performance table
        performance_data[strategy_name] = [
            '$10,000.00',  # Initial capital is always $10,000
            f"${perf['final_value']:,.2f}",
            f"{perf['total_return']:.2f}%",
            f"{perf['annualized_return']:.2f}%",
            f"{perf['sharpe_ratio']:.4f}",
            f"{perf['max_drawdown']:.2f}%",
            f"${perf['transaction_costs']:.2f}"
        ]
    
    # Print performance table
    print("\n===== Portfolio Performance Summary =====")
    performance_table = pd.DataFrame(performance_data)
    print(performance_table.to_string(index=False))

    # If we have model metrics, create a table for those as well
    model_metrics_data = {'Metric': ['Accuracy', 'Macro F1', 'Weighted F1', 'MCC']}
    has_model_metrics = False
    
    for strategy in strategies_data:
        if 'model_metrics' in strategy:
            has_model_metrics = True
            strategy_name = strategy['strategy_name']
            test_metrics = strategy['model_metrics']['test']
            
            model_metrics_data[strategy_name] = [
                f"{test_metrics['accuracy']:.4f}",
                f"{test_metrics['macro_f1']:.4f}",
                f"{test_metrics['weighted_f1']:.4f}",
                f"{test_metrics['mcc']:.4f}"
            ]
    
    if has_model_metrics:
        print("\n===== Model Metrics (Test Set) =====")
        model_metrics_table = pd.DataFrame(model_metrics_data)
        print(model_metrics_table.to_string(index=False))

# End-to-end function to run the trading system with top and bottom features
def run_trading_system_comparison(feature_type='global', k=10, stop_loss=0, trading_cost_pct=0.001):
    """
    Run the trading system with both top and bottom features and compare results.
    
    Parameters:
    -----------
    feature_type : str, default='global'
        Type of feature importance to use ('global' or 'leaves')
    k : int, default=10
        Number of features to select
    stop_loss : int, default=0
        Stop loss parameter that determines which target column to use
    trading_cost_pct : float, default=0.001
        Transaction cost as a percentage (0.001 = 0.1%)
        
    Returns:
    --------
    dict
        Performance metrics for all strategies
    """
    # Load data
    TSLA = pd.read_csv("./dataset/TSLA.csv").iloc[:, 1:]
    
    # Load feature importance dataframes
    if feature_type.lower() == 'global':
        feature_importance_df = pd.read_csv("./select_features/TSLA200_global.csv")
    elif feature_type.lower() == 'leaves':
        feature_importance_df = pd.read_csv("./select_features/TSLA200_leaves.csv")
    else:
        raise ValueError("feature_type must be either 'global' or 'leaves'")
    
    # Determine target column
    if stop_loss == 0:
        target_col = 'Y'
    elif stop_loss == 2:
        target_col = 'Y_2'
    elif stop_loss == 3:
        target_col = 'Y_3'
    elif stop_loss == 4:
        target_col = 'Y_4'
    elif stop_loss == 5:
        target_col = 'Y_5'
    else:
        raise ValueError('Invalid stop_loss value')
    
    # Preprocess data
    train_df, valid_df, test_df, _ = preprocessing(TSLA, stop_loss=stop_loss, scale=True)
    
    # 1. Select top features and evaluate
    print("\n=== Using Top Features ===")
    train_top, valid_top, test_top = select_features(
        train_df, valid_df, test_df, 
        feature_importance_df=feature_importance_df, 
        feature_type=feature_type,
        top_or_bottom='top', 
        k=k,
        target_col=target_col
    )
    
    top_features_results = train_and_evaluate_model(
        train_top, valid_top, test_top, 
        strategy_name=f'Top {k} Features',
        target_col=target_col,
        trading_cost_pct=trading_cost_pct
    )
    
    # 2. Select bottom features and evaluate
    print("\n=== Using Bottom Features ===")
    train_bottom, valid_bottom, test_bottom = select_features(
        train_df, valid_df, test_df, 
        feature_importance_df=feature_importance_df, 
        feature_type=feature_type,
        top_or_bottom='bottom', 
        k=k,
        target_col=target_col
    )
    
    bottom_features_results = train_and_evaluate_model(
        train_bottom, valid_bottom, test_bottom, 
        strategy_name=f'Bottom {k} Features',
        target_col=target_col,
        trading_cost_pct=trading_cost_pct
    )
    
    # 3. Calculate Buy and Hold performance
    buy_hold_results = calculate_buy_hold(
        top_features_results['trading_df'],  # Use same price data
        initial_capital=10000,
        trading_cost_pct=trading_cost_pct
    )
    
    # 4. Compare all strategies
    all_strategies = [top_features_results, bottom_features_results, buy_hold_results]
    compare_strategies(all_strategies)
    
    return {
        'top_features': top_features_results,
        'bottom_features': bottom_features_results,
        'buy_hold': buy_hold_results
    }

# Example: Run with default parameters
# results = run_trading_system_comparison()

In [20]:
performance = run_trading_system_comparison(stop_loss=0, feature_type='global', k=10, trading_cost_pct=0.002)




=== Using Top Features ===
Selected top 10 features based on global importance:
1. CloseStd_3
2. Volume_SMA_21
3. Volume_EMA_24
4. CloseStd_2
5. RSI_8
6. RSI_27
7. Volume_SMA_11
8. StoK_11
9. CCI_28
10. EMA_6

Top 10 Features Validation Set Evaluation:

Validation Set Metrics:
ACC: 0.5280
Macro F1: 0.5280
Weighted F1: 0.5283
MCC: 0.0642

Top 10 Features Test Set Evaluation:

Test Set Metrics:
ACC: 0.5448
Macro F1: 0.5309
Weighted F1: 0.5301
MCC: 0.0999

===== Top 10 Features Trade Log Summary =====
Total trades: 28
Buy trades: 14
Sell trades: 14
Total Transaction Costs: $644.81

=== Using Bottom Features ===
Selected bottom 10 features based on global importance:
1. Volume_SMA_9
2. WR_14
3. BBup_18
4. RSI_21
5. CloseStd_30
6. StoK_6
7. StoD_3
8. BBup_19
9. SMA_6
10. SMA_15

Bottom 10 Features Validation Set Evaluation:

Validation Set Metrics:
ACC: 0.5480
Macro F1: 0.5250
Weighted F1: 0.5350
MCC: 0.0674

Bottom 10 Features Test Set Evaluation:

Test Set Metrics:
ACC: 0.5233
Macro F1: 