# CCI Oscillator Optimizer

In [None]:
"""
Main script to run hyperparameter optimization and sensitivity analysis
for the Know Sure Thing strategy using the portfolio-based evaluation framework.
"""

import json
import logging
import os
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import mlflow
import pandas as pd
import yfinance as yf

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

try:
    from src.strategies.mean_reversion.cci_oscillator_strat import CCIStrategy
    from src.optimizer.strategy_optimizer import StrategyOptimizer
    from src.optimizer.sensitivity_analyzer import SensitivityAnalyzer
    from src.database.config import DatabaseConfig
except ImportError as e:
    print("Error importing modules. Make sure the script is run from the project root")
    print("or the 'src' directory is in the Python path.")
    print(f"Import Error: {e}")
    sys.exit(1)

In [None]:
# single stock

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# MLflow Configuration
MLFLOW_TRACKING_URI = "file:./mlruns"  # Store MLflow data locally in ./mlruns
RUN_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

# Data Configuration
TICKER_FILE_PATH = "../data/ticker.xlsx" # Path relative to project root
MAX_TICKERS = None # Limit tickers for faster testing, set to None to use all

# Backtest Period
START_DATE = (datetime.now() - timedelta(days=4*365)).strftime("%Y-%m-%d")
END_DATE = datetime.now().strftime("%Y-%m-%d")

# Optimization Settings
CV_FOLDS = 5
MAX_EVALS = 50  # Number of hyperparameter sets to evaluate
OPTIMIZATION_METRIC = 'harmonic_mean' # Portfolio metric to maximize (minus penalty)
N_JOBS = -1 # Use all available CPU cores for fold evaluation within optimizer

# Sensitivity Analysis Settings
RUN_SENSITIVITY = False # Set to False to skip sensitivity analysis
NUMERIC_PERTURBATION = 0.15 # +/- 15% for sensitivity
SENS_SAMPLES_PER_PARAM = 5
SENS_RANDOM_SAMPLES = 20

# --- Define Search Space for Awesome Oscillator ---

# Note: Hyperopt doesn't easily enforce short_period < long_period directly during sampling.
# The optimizer will evaluate invalid combinations, and they will likely fail or perform poorly.
# Strategy itself raises ValueError if short >= long during initialization.

from src.optimizer.search_space import cci_oscillator_strat_search_space

# --- Helper Functions ---

def load_tickers(file_path: str, max_tickers: Optional[int] = None) -> List[str]:
    """Loads and formats ticker symbols from an Excel file."""
    logger.info(f"Loading tickers from: {file_path}")
    try:
        tickers_df = pd.read_excel(file_path)
        # Basic validation
        if not all(col in tickers_df.columns for col in ["Security Name"]):
            raise ValueError("Ticker file missing required columns: 'Security Name'")

        tickers_df = tickers_df.drop_duplicates(subset=["Security Name"]).reset_index(drop=True)

        def add_ticker_suffix(row):
            name = str(row["Security Name"]).strip().upper()
            # Fetch company information using yfinance
            stock = yf.Ticker(name)
            exchange = str(stock.info.get("exchange", None)).strip().upper()
            return f"{name}"

        tickers_df["Ticker"] = tickers_df.apply(add_ticker_suffix, axis=1)
        ticker_list = tickers_df["Ticker"].unique().tolist()

        logger.info(f"Loaded {len(ticker_list)} unique tickers.")
        if max_tickers and len(ticker_list) > max_tickers:
            logger.warning(f"Limiting tickers to {max_tickers} for this run.")
            ticker_list = ticker_list[:max_tickers]

        if not ticker_list:
             raise ValueError("No tickers loaded.")

        return ticker_list

    except FileNotFoundError:
        logger.error(f"Ticker file not found at: {file_path}")
        raise
    except Exception as e:
        logger.error(f"Error processing ticker file: {e}")
        raise

# --- Main Execution ---

if __name__ == "__main__":
    logger.info("--- Starting CCI Oscillator Optimization Script ---")

    # Setup MLflow
    try:
        mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
        logger.info(f"MLflow tracking URI set to: {MLFLOW_TRACKING_URI}")
    except Exception as e:
        logger.error(f"Failed to set MLflow tracking URI: {e}")
        sys.exit(1)

        # Create MLflow experiment if it doesn't exist
    try:
        experiment_name = f"CCI_Oscillator_{RUN_TIMESTAMP}"
        # Check if experiment exists
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            # Create new experiment
            experiment_id = mlflow.create_experiment(experiment_name)
            logger.info(f"Created new MLflow experiment: {experiment_name} with ID: {experiment_id}")
        else:
            experiment_id = experiment.experiment_id
            logger.info(f"Using existing MLflow experiment: {experiment_name} with ID: {experiment_id}")
        
        # Set the experiment for subsequent runs
        mlflow.set_experiment(experiment_name)
    except Exception as e:
        logger.error(f"Failed to create or set MLflow experiment: {e}")
        sys.exit(1)

    # Load Tickers
    try:
        tickers_to_run = load_tickers(TICKER_FILE_PATH, MAX_TICKERS)
    except Exception:
        logger.error("Failed to load tickers. Exiting.")
        sys.exit(1)

    # Database Config
    try:
        db_config = DatabaseConfig.default()
        # Optional: Add a check here to ensure DB connection is valid if possible
        logger.info("Database configuration loaded.")
    except Exception as e:
        logger.error(f"Failed to load database configuration: {e}")
        sys.exit(1)

    # --- Run Optimization ---
    optimizer = None
    best_params = {}
    portfolio_performance_report = pd.DataFrame()
    param_history_report = pd.DataFrame()

    logger.info(f"Initializing StrategyOptimizer for {CCIStrategy.__name__}")
    try:
        optimizer = StrategyOptimizer(
            strategy_class=CCIStrategy,
            db_config=db_config,
            search_space=cci_oscillator_strat_search_space,
            tickers=tickers_to_run,
            start_date=START_DATE,
            end_date=END_DATE,
            cv_folds=CV_FOLDS,
            max_evals=MAX_EVALS,
            optimization_metric=OPTIMIZATION_METRIC,
            run_name=f"CCI_Oscillator_{RUN_TIMESTAMP}",
            n_jobs=N_JOBS
            # risk_thresholds can be customized here if needed, otherwise defaults are used
        )

        logger.info("Starting hyperparameter optimization...")
        best_params, portfolio_performance_report, param_history_report = optimizer.run_optimization()

        if not best_params:
             logger.error("Optimization did not yield valid results. Best parameters not found.")
        else:
             logger.info("--- Optimization Results ---")
             logger.info(f"Best Parameters found:\n{json.dumps(best_params, indent=2)}")
             logger.info(f"\nBest Portfolio Performance Report:\n{portfolio_performance_report.to_string()}")
             logger.info(f"\nParameter History saved (see MLflow artifacts or CSV file). Head:\n{param_history_report.head().to_string()}")

    except Exception as e:
        logger.error(f"An error occurred during optimization: {e}", exc_info=True)
        # Attempt to end MLflow run if it was started by the optimizer
        if mlflow.active_run():
            mlflow.end_run("FAILED")

    # --- Run Sensitivity Analysis (Optional) ---
    if RUN_SENSITIVITY and optimizer and best_params:
        logger.info("\n--- Starting Sensitivity Analysis ---")
        try:
            analyzer = SensitivityAnalyzer(
                strategy_optimizer=optimizer, # Reuse optimizer for its config and evaluation cache
                base_params=best_params,
                numeric_perturbation=NUMERIC_PERTURBATION,
                num_samples_per_param=SENS_SAMPLES_PER_PARAM,
                num_random_samples=SENS_RANDOM_SAMPLES,
                parallel=True # Relies on optimizer's internal parallelization/caching
            )

            sensitivity_results_df, parameter_impact_df = analyzer.run()

            if sensitivity_results_df.empty:
                 logger.warning("Sensitivity analysis did not produce results.")
            else:
                logger.info("--- Sensitivity Analysis Results ---")
                logger.info(f"Sensitivity Results saved (see MLflow artifacts or CSV file). Head:\n{sensitivity_results_df.head().to_string()}")
                logger.info(f"\nParameter Impact Report (Correlation):\n{parameter_impact_df.to_string()}")

        except Exception as e:
            logger.error(f"An error occurred during sensitivity analysis: {e}", exc_info=True)
            if mlflow.active_run():
                 mlflow.end_run("FAILED") # End sensitivity run if it crashed

    elif RUN_SENSITIVITY and (not optimizer or not best_params):
        logger.warning("Skipping sensitivity analysis because optimization failed or produced no best parameters.")


    # Ensure any lingering run is terminated cleanly
    # Should not be necessary if 'with mlflow.start_run()' is used correctly inside modules
    # try:
    #     while mlflow.active_run():
    #         logger.info(f"Ending lingering MLflow run: {mlflow.active_run().info.run_id}")
    #         mlflow.end_run()
    # except Exception:
    #      pass # Ignore errors during cleanup

    logger.info("--- Script Finished ---")

# Disparity Index

In [None]:
"""
Main script to run hyperparameter optimization and sensitivity analysis
for the Know Sure Thing strategy using the portfolio-based evaluation framework.
"""
import logging
import os
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import mlflow
import pandas as pd
import yfinance as yf

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

try:
    from src.strategies.mean_reversion.disparity_index_strat import DisparityIndexStrategy
    from src.optimizer.strategy_optimizer import StrategyOptimizer
    from src.optimizer.sensitivity_analyzer import SensitivityAnalyzer
    from src.database.config import DatabaseConfig
except ImportError as e:
    print("Error importing modules. Make sure the script is run from the project root")
    print("or the 'src' directory is in the Python path.")
    print(f"Import Error: {e}")
    sys.exit(1)

In [None]:
# single stock

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# MLflow Configuration
MLFLOW_TRACKING_URI = "file:./mlruns"  # Store MLflow data locally in ./mlruns
RUN_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

# Data Configuration
TICKER_FILE_PATH = "../data/ticker.xlsx" # Path relative to project root
MAX_TICKERS = None # Limit tickers for faster testing, set to None to use all

# Backtest Period
START_DATE = (datetime.now() - timedelta(days=4*365)).strftime("%Y-%m-%d")
END_DATE = datetime.now().strftime("%Y-%m-%d")

# Optimization Settings
CV_FOLDS = 5
MAX_EVALS = 50  # Number of hyperparameter sets to evaluate
OPTIMIZATION_METRIC = 'harmonic_mean' # Portfolio metric to maximize (minus penalty)
N_JOBS = -1 # Use all available CPU cores for fold evaluation within optimizer

# Sensitivity Analysis Settings
RUN_SENSITIVITY = False # Set to False to skip sensitivity analysis
NUMERIC_PERTURBATION = 0.15 # +/- 15% for sensitivity
SENS_SAMPLES_PER_PARAM = 5
SENS_RANDOM_SAMPLES = 20

# --- Define Search Space for Awesome Oscillator ---

# Note: Hyperopt doesn't easily enforce short_period < long_period directly during sampling.
# The optimizer will evaluate invalid combinations, and they will likely fail or perform poorly.
# Strategy itself raises ValueError if short >= long during initialization.

from src.optimizer.search_space import disparity_index_strat_search_space

# --- Helper Functions ---

def load_tickers(file_path: str, max_tickers: Optional[int] = None) -> List[str]:
    """Loads and formats ticker symbols from an Excel file."""
    logger.info(f"Loading tickers from: {file_path}")
    try:
        tickers_df = pd.read_excel(file_path)
        # Basic validation
        if not all(col in tickers_df.columns for col in ["Security Name"]):
            raise ValueError("Ticker file missing required columns: 'Security Name'")

        tickers_df = tickers_df.drop_duplicates(subset=["Security Name"]).reset_index(drop=True)

        def add_ticker_suffix(row):
            name = str(row["Security Name"]).strip().upper()
            # Fetch company information using yfinance
            stock = yf.Ticker(name)
            exchange = str(stock.info.get("exchange", None)).strip().upper()
            return f"{name}"

        tickers_df["Ticker"] = tickers_df.apply(add_ticker_suffix, axis=1)
        ticker_list = tickers_df["Ticker"].unique().tolist()

        logger.info(f"Loaded {len(ticker_list)} unique tickers.")
        if max_tickers and len(ticker_list) > max_tickers:
            logger.warning(f"Limiting tickers to {max_tickers} for this run.")
            ticker_list = ticker_list[:max_tickers]

        if not ticker_list:
             raise ValueError("No tickers loaded.")

        return ticker_list

    except FileNotFoundError:
        logger.error(f"Ticker file not found at: {file_path}")
        raise
    except Exception as e:
        logger.error(f"Error processing ticker file: {e}")
        raise

# --- Main Execution ---

if __name__ == "__main__":
    logger.info("--- Starting Disparity Index Optimization Script ---")

    # Setup MLflow
    try:
        mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
        logger.info(f"MLflow tracking URI set to: {MLFLOW_TRACKING_URI}")
    except Exception as e:
        logger.error(f"Failed to set MLflow tracking URI: {e}")
        sys.exit(1)

        # Create MLflow experiment if it doesn't exist
    try:
        experiment_name = f"Disparity_Index_{RUN_TIMESTAMP}"
        # Check if experiment exists
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            # Create new experiment
            experiment_id = mlflow.create_experiment(experiment_name)
            logger.info(f"Created new MLflow experiment: {experiment_name} with ID: {experiment_id}")
        else:
            experiment_id = experiment.experiment_id
            logger.info(f"Using existing MLflow experiment: {experiment_name} with ID: {experiment_id}")
        
        # Set the experiment for subsequent runs
        mlflow.set_experiment(experiment_name)
    except Exception as e:
        logger.error(f"Failed to create or set MLflow experiment: {e}")
        sys.exit(1)

    # Load Tickers
    try:
        tickers_to_run = load_tickers(TICKER_FILE_PATH, MAX_TICKERS)
    except Exception:
        logger.error("Failed to load tickers. Exiting.")
        sys.exit(1)

    # Database Config
    try:
        db_config = DatabaseConfig.default()
        # Optional: Add a check here to ensure DB connection is valid if possible
        logger.info("Database configuration loaded.")
    except Exception as e:
        logger.error(f"Failed to load database configuration: {e}")
        sys.exit(1)

    # --- Run Optimization ---
    optimizer = None
    best_params = {}
    portfolio_performance_report = pd.DataFrame()
    param_history_report = pd.DataFrame()

    logger.info(f"Initializing StrategyOptimizer for {DisparityIndexStrategy.__name__}")
    try:
        optimizer = StrategyOptimizer(
            strategy_class=DisparityIndexStrategy,
            db_config=db_config,
            search_space=disparity_index_strat_search_space,
            tickers=tickers_to_run,
            start_date=START_DATE,
            end_date=END_DATE,
            cv_folds=CV_FOLDS,
            max_evals=MAX_EVALS,
            optimization_metric=OPTIMIZATION_METRIC,
            run_name=f"Disparity_index_{RUN_TIMESTAMP}",
            n_jobs=N_JOBS
            # risk_thresholds can be customized here if needed, otherwise defaults are used
        )

        logger.info("Starting hyperparameter optimization...")
        best_params, portfolio_performance_report, param_history_report = optimizer.run_optimization()

        if not best_params:
             logger.error("Optimization did not yield valid results. Best parameters not found.")
        else:
             logger.info("--- Optimization Results ---")
             logger.info(f"Best Parameters found:\n{json.dumps(best_params, indent=2)}")
             logger.info(f"\nBest Portfolio Performance Report:\n{portfolio_performance_report.to_string()}")
             logger.info(f"\nParameter History saved (see MLflow artifacts or CSV file). Head:\n{param_history_report.head().to_string()}")

    except Exception as e:
        logger.error(f"An error occurred during optimization: {e}", exc_info=True)
        # Attempt to end MLflow run if it was started by the optimizer
        if mlflow.active_run():
            mlflow.end_run("FAILED")

    # --- Run Sensitivity Analysis (Optional) ---
    if RUN_SENSITIVITY and optimizer and best_params:
        logger.info("\n--- Starting Sensitivity Analysis ---")
        try:
            analyzer = SensitivityAnalyzer(
                strategy_optimizer=optimizer, # Reuse optimizer for its config and evaluation cache
                base_params=best_params,
                numeric_perturbation=NUMERIC_PERTURBATION,
                num_samples_per_param=SENS_SAMPLES_PER_PARAM,
                num_random_samples=SENS_RANDOM_SAMPLES,
                parallel=True # Relies on optimizer's internal parallelization/caching
            )

            sensitivity_results_df, parameter_impact_df = analyzer.run()

            if sensitivity_results_df.empty:
                 logger.warning("Sensitivity analysis did not produce results.")
            else:
                logger.info("--- Sensitivity Analysis Results ---")
                logger.info(f"Sensitivity Results saved (see MLflow artifacts or CSV file). Head:\n{sensitivity_results_df.head().to_string()}")
                logger.info(f"\nParameter Impact Report (Correlation):\n{parameter_impact_df.to_string()}")

        except Exception as e:
            logger.error(f"An error occurred during sensitivity analysis: {e}", exc_info=True)
            if mlflow.active_run():
                 mlflow.end_run("FAILED") # End sensitivity run if it crashed

    elif RUN_SENSITIVITY and (not optimizer or not best_params):
        logger.warning("Skipping sensitivity analysis because optimization failed or produced no best parameters.")


    # Ensure any lingering run is terminated cleanly
    # Should not be necessary if 'with mlflow.start_run()' is used correctly inside modules
    # try:
    #     while mlflow.active_run():
    #         logger.info(f"Ending lingering MLflow run: {mlflow.active_run().info.run_id}")
    #         mlflow.end_run()
    # except Exception:
    #      pass # Ignore errors during cleanup

    logger.info("--- Script Finished ---")

# Relative Strength Index

In [None]:
"""
Main script to run hyperparameter optimization and sensitivity analysis
for the Know Sure Thing strategy using the portfolio-based evaluation framework.
"""

import json
import logging
import os
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import mlflow
import pandas as pd
import yfinance as yf

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

try:
    from src.strategies.mean_reversion.relative_strength_index_strat import RSIStrategy
    from src.optimizer.strategy_optimizer import StrategyOptimizer
    from src.optimizer.sensitivity_analyzer import SensitivityAnalyzer
    from src.database.config import DatabaseConfig
except ImportError as e:
    print("Error importing modules. Make sure the script is run from the project root")
    print("or the 'src' directory is in the Python path.")
    print(f"Import Error: {e}")
    sys.exit(1)

In [None]:
# single stock

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# MLflow Configuration
MLFLOW_TRACKING_URI = "file:./mlruns"  # Store MLflow data locally in ./mlruns
RUN_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

# Data Configuration
TICKER_FILE_PATH = "../data/ticker.xlsx" # Path relative to project root
MAX_TICKERS = None # Limit tickers for faster testing, set to None to use all


# Backtest Period
START_DATE = (datetime.now() - timedelta(days=4*365)).strftime("%Y-%m-%d")
END_DATE = datetime.now().strftime("%Y-%m-%d")

# Optimization Settings
CV_FOLDS = 3
MAX_EVALS = 50  # Number of hyperparameter sets to evaluate
OPTIMIZATION_METRIC = 'harmonic_mean' # Portfolio metric to maximize (minus penalty)
N_JOBS = -1 # Use all available CPU cores for fold evaluation within optimizer

# Sensitivity Analysis Settings
RUN_SENSITIVITY = False # Set to False to skip sensitivity analysis
NUMERIC_PERTURBATION = 0.15 # +/- 15% for sensitivity
SENS_SAMPLES_PER_PARAM = 5
SENS_RANDOM_SAMPLES = 20

# --- Define Search Space for Awesome Oscillator ---

# Note: Hyperopt doesn't easily enforce short_period < long_period directly during sampling.
# The optimizer will evaluate invalid combinations, and they will likely fail or perform poorly.
# Strategy itself raises ValueError if short >= long during initialization.

from src.optimizer.search_space import relative_strength_index_strat_search_space

# --- Helper Functions ---

def load_tickers(file_path: str, max_tickers: Optional[int] = None) -> List[str]:
    """Loads and formats ticker symbols from an Excel file."""
    logger.info(f"Loading tickers from: {file_path}")
    try:
        tickers_df = pd.read_excel(file_path)
        # Basic validation
        if not all(col in tickers_df.columns for col in ["Security Name"]):
            raise ValueError("Ticker file missing required columns: 'Security Name'")

        tickers_df = tickers_df.drop_duplicates(subset=["Security Name"]).reset_index(drop=True)

        def add_ticker_suffix(row):
            name = str(row["Security Name"]).strip().upper()
            # Fetch company information using yfinance
            stock = yf.Ticker(name)
            exchange = str(stock.info.get("exchange", None)).strip().upper()
            return f"{name}"

        tickers_df["Ticker"] = tickers_df.apply(add_ticker_suffix, axis=1)
        ticker_list = tickers_df["Ticker"].unique().tolist()

        logger.info(f"Loaded {len(ticker_list)} unique tickers.")
        if max_tickers and len(ticker_list) > max_tickers:
            logger.warning(f"Limiting tickers to {max_tickers} for this run.")
            ticker_list = ticker_list[:max_tickers]

        if not ticker_list:
             raise ValueError("No tickers loaded.")

        return ticker_list

    except FileNotFoundError:
        logger.error(f"Ticker file not found at: {file_path}")
        raise
    except Exception as e:
        logger.error(f"Error processing ticker file: {e}")
        raise

# --- Main Execution ---

if __name__ == "__main__":
    logger.info("--- Starting Disparity Index Optimization Script ---")

    # Setup MLflow
    try:
        mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
        logger.info(f"MLflow tracking URI set to: {MLFLOW_TRACKING_URI}")
    except Exception as e:
        logger.error(f"Failed to set MLflow tracking URI: {e}")
        sys.exit(1)

        # Create MLflow experiment if it doesn't exist
    try:
        experiment_name = f"Relative_Strength_index_{RUN_TIMESTAMP}"
        # Check if experiment exists
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            # Create new experiment
            experiment_id = mlflow.create_experiment(experiment_name)
            logger.info(f"Created new MLflow experiment: {experiment_name} with ID: {experiment_id}")
        else:
            experiment_id = experiment.experiment_id
            logger.info(f"Using existing MLflow experiment: {experiment_name} with ID: {experiment_id}")
        
        # Set the experiment for subsequent runs
        mlflow.set_experiment(experiment_name)
    except Exception as e:
        logger.error(f"Failed to create or set MLflow experiment: {e}")
        sys.exit(1)

    # Load Tickers
    try:
        tickers_to_run = load_tickers(TICKER_FILE_PATH, MAX_TICKERS)
    except Exception:
        logger.error("Failed to load tickers. Exiting.")
        sys.exit(1)

    # Database Config
    try:
        db_config = DatabaseConfig.default()
        # Optional: Add a check here to ensure DB connection is valid if possible
        logger.info("Database configuration loaded.")
    except Exception as e:
        logger.error(f"Failed to load database configuration: {e}")
        sys.exit(1)

    # --- Run Optimization ---
    optimizer = None
    best_params = {}
    portfolio_performance_report = pd.DataFrame()
    param_history_report = pd.DataFrame()

    logger.info(f"Initializing StrategyOptimizer for {RSIStrategy.__name__}")
    try:
        optimizer = StrategyOptimizer(
            strategy_class=RSIStrategy,
            db_config=db_config,
            search_space=relative_strength_index_strat_search_space,
            tickers=tickers_to_run,
            start_date=START_DATE,
            end_date=END_DATE,
            cv_folds=CV_FOLDS,
            max_evals=MAX_EVALS,
            optimization_metric=OPTIMIZATION_METRIC,
            run_name=f"Disparity_index_{RUN_TIMESTAMP}",
            n_jobs=N_JOBS
            # risk_thresholds can be customized here if needed, otherwise defaults are used
        )

        logger.info("Starting hyperparameter optimization...")
        best_params, portfolio_performance_report, param_history_report = optimizer.run_optimization()

        if not best_params:
             logger.error("Optimization did not yield valid results. Best parameters not found.")
        else:
             logger.info("--- Optimization Results ---")
             logger.info(f"Best Parameters found:\n{json.dumps(best_params, indent=2)}")
             logger.info(f"\nBest Portfolio Performance Report:\n{portfolio_performance_report.to_string()}")
             logger.info(f"\nParameter History saved (see MLflow artifacts or CSV file). Head:\n{param_history_report.head().to_string()}")

    except Exception as e:
        logger.error(f"An error occurred during optimization: {e}", exc_info=True)
        # Attempt to end MLflow run if it was started by the optimizer
        if mlflow.active_run():
            mlflow.end_run("FAILED")

    # --- Run Sensitivity Analysis (Optional) ---
    if RUN_SENSITIVITY and optimizer and best_params:
        logger.info("\n--- Starting Sensitivity Analysis ---")
        try:
            analyzer = SensitivityAnalyzer(
                strategy_optimizer=optimizer, # Reuse optimizer for its config and evaluation cache
                base_params=best_params,
                numeric_perturbation=NUMERIC_PERTURBATION,
                num_samples_per_param=SENS_SAMPLES_PER_PARAM,
                num_random_samples=SENS_RANDOM_SAMPLES,
                parallel=True # Relies on optimizer's internal parallelization/caching
            )

            sensitivity_results_df, parameter_impact_df = analyzer.run()

            if sensitivity_results_df.empty:
                 logger.warning("Sensitivity analysis did not produce results.")
            else:
                logger.info("--- Sensitivity Analysis Results ---")
                logger.info(f"Sensitivity Results saved (see MLflow artifacts or CSV file). Head:\n{sensitivity_results_df.head().to_string()}")
                logger.info(f"\nParameter Impact Report (Correlation):\n{parameter_impact_df.to_string()}")

        except Exception as e:
            logger.error(f"An error occurred during sensitivity analysis: {e}", exc_info=True)
            if mlflow.active_run():
                 mlflow.end_run("FAILED") # End sensitivity run if it crashed

    elif RUN_SENSITIVITY and (not optimizer or not best_params):
        logger.warning("Skipping sensitivity analysis because optimization failed or produced no best parameters.")


    # Ensure any lingering run is terminated cleanly
    # Should not be necessary if 'with mlflow.start_run()' is used correctly inside modules
    # try:
    #     while mlflow.active_run():
    #         logger.info(f"Ending lingering MLflow run: {mlflow.active_run().info.run_id}")
    #         mlflow.end_run()
    # except Exception:
    #      pass # Ignore errors during cleanup

    logger.info("--- Script Finished ---")

# Stochastic Oscillator

In [None]:
"""
Main script to run hyperparameter optimization and sensitivity analysis
for the Know Sure Thing strategy using the portfolio-based evaluation framework.
"""

import json
import logging
import os
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import mlflow
import pandas as pd
import yfinance as yf

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

try:
    from src.strategies.mean_reversion.stochastic_oscillator_strat import StochasticStrategy
    from src.optimizer.strategy_optimizer import StrategyOptimizer
    from src.optimizer.sensitivity_analyzer import SensitivityAnalyzer
    from src.database.config import DatabaseConfig
except ImportError as e:
    print("Error importing modules. Make sure the script is run from the project root")
    print("or the 'src' directory is in the Python path.")
    print(f"Import Error: {e}")
    sys.exit(1)

In [None]:
# single stock

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# MLflow Configuration
MLFLOW_TRACKING_URI = "file:./mlruns"  # Store MLflow data locally in ./mlruns
RUN_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

# Data Configuration
TICKER_FILE_PATH = "../data/ticker.xlsx" # Path relative to project root
MAX_TICKERS = 10 # Limit tickers for faster testing, set to None to use all

# Backtest Period
START_DATE = (datetime.now() - timedelta(days=4*365)).strftime("%Y-%m-%d")
END_DATE = datetime.now().strftime("%Y-%m-%d")

# Optimization Settings
CV_FOLDS = 5
MAX_EVALS = 50  # Number of hyperparameter sets to evaluate
OPTIMIZATION_METRIC = 'harmonic_mean' # Portfolio metric to maximize (minus penalty)
N_JOBS = -1 # Use all available CPU cores for fold evaluation within optimizer

# Sensitivity Analysis Settings
RUN_SENSITIVITY = False # Set to False to skip sensitivity analysis
NUMERIC_PERTURBATION = 0.15 # +/- 15% for sensitivity
SENS_SAMPLES_PER_PARAM = 5
SENS_RANDOM_SAMPLES = 20

# --- Define Search Space for Awesome Oscillator ---

# Note: Hyperopt doesn't easily enforce short_period < long_period directly during sampling.
# The optimizer will evaluate invalid combinations, and they will likely fail or perform poorly.
# Strategy itself raises ValueError if short >= long during initialization.

from src.optimizer.search_space import stochastic_oscillator_strat_search_space

# --- Helper Functions ---

def load_tickers(file_path: str, max_tickers: Optional[int] = None) -> List[str]:
    """Loads and formats ticker symbols from an Excel file."""
    logger.info(f"Loading tickers from: {file_path}")
    try:
        tickers_df = pd.read_excel(file_path)
        # Basic validation
        if not all(col in tickers_df.columns for col in ["Security Name"]):
            raise ValueError("Ticker file missing required columns: 'Security Name'")

        tickers_df = tickers_df.drop_duplicates(subset=["Security Name"]).reset_index(drop=True)

        def add_ticker_suffix(row):
            name = str(row["Security Name"]).strip().upper()
            # Fetch company information using yfinance
            stock = yf.Ticker(name)
            exchange = str(stock.info.get("exchange", None)).strip().upper()
            return f"{name}"

        tickers_df["Ticker"] = tickers_df.apply(add_ticker_suffix, axis=1)
        ticker_list = tickers_df["Ticker"].unique().tolist()

        logger.info(f"Loaded {len(ticker_list)} unique tickers.")
        if max_tickers and len(ticker_list) > max_tickers:
            logger.warning(f"Limiting tickers to {max_tickers} for this run.")
            ticker_list = ticker_list[:max_tickers]

        if not ticker_list:
             raise ValueError("No tickers loaded.")

        return ticker_list

    except FileNotFoundError:
        logger.error(f"Ticker file not found at: {file_path}")
        raise
    except Exception as e:
        logger.error(f"Error processing ticker file: {e}")
        raise

# --- Main Execution ---

if __name__ == "__main__":
    logger.info("--- Starting Disparity Index Optimization Script ---")

    # Setup MLflow
    try:
        mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
        logger.info(f"MLflow tracking URI set to: {MLFLOW_TRACKING_URI}")
    except Exception as e:
        logger.error(f"Failed to set MLflow tracking URI: {e}")
        sys.exit(1)

        # Create MLflow experiment if it doesn't exist
    try:
        experiment_name = f"stochastic_oscillator_index_{RUN_TIMESTAMP}"
        # Check if experiment exists
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            # Create new experiment
            experiment_id = mlflow.create_experiment(experiment_name)
            logger.info(f"Created new MLflow experiment: {experiment_name} with ID: {experiment_id}")
        else:
            experiment_id = experiment.experiment_id
            logger.info(f"Using existing MLflow experiment: {experiment_name} with ID: {experiment_id}")
        
        # Set the experiment for subsequent runs
        mlflow.set_experiment(experiment_name)
    except Exception as e:
        logger.error(f"Failed to create or set MLflow experiment: {e}")
        sys.exit(1)

    # Load Tickers
    try:
        tickers_to_run = load_tickers(TICKER_FILE_PATH, MAX_TICKERS)
    except Exception:
        logger.error("Failed to load tickers. Exiting.")
        sys.exit(1)

    # Database Config
    try:
        db_config = DatabaseConfig.default()
        # Optional: Add a check here to ensure DB connection is valid if possible
        logger.info("Database configuration loaded.")
    except Exception as e:
        logger.error(f"Failed to load database configuration: {e}")
        sys.exit(1)

    # --- Run Optimization ---
    optimizer = None
    best_params = {}
    portfolio_performance_report = pd.DataFrame()
    param_history_report = pd.DataFrame()

    logger.info(f"Initializing StrategyOptimizer for {StochasticStrategy.__name__}")
    try:
        optimizer = StrategyOptimizer(
            strategy_class=StochasticStrategy,
            db_config=db_config,
            search_space=stochastic_oscillator_strat_search_space,
            tickers=tickers_to_run,
            start_date=START_DATE,
            end_date=END_DATE,
            cv_folds=CV_FOLDS,
            max_evals=MAX_EVALS,
            optimization_metric=OPTIMIZATION_METRIC,
            run_name=f"Stochastic_Oscillator_{RUN_TIMESTAMP}",
            n_jobs=N_JOBS
            # risk_thresholds can be customized here if needed, otherwise defaults are used
        )

        logger.info("Starting hyperparameter optimization...")
        best_params, portfolio_performance_report, param_history_report = optimizer.run_optimization()

        if not best_params:
             logger.error("Optimization did not yield valid results. Best parameters not found.")
        else:
             logger.info("--- Optimization Results ---")
             logger.info(f"Best Parameters found:\n{json.dumps(best_params, indent=2)}")
             logger.info(f"\nBest Portfolio Performance Report:\n{portfolio_performance_report.to_string()}")
             logger.info(f"\nParameter History saved (see MLflow artifacts or CSV file). Head:\n{param_history_report.head().to_string()}")

    except Exception as e:
        logger.error(f"An error occurred during optimization: {e}", exc_info=True)
        # Attempt to end MLflow run if it was started by the optimizer
        if mlflow.active_run():
            mlflow.end_run("FAILED")

    # --- Run Sensitivity Analysis (Optional) ---
    if RUN_SENSITIVITY and optimizer and best_params:
        logger.info("\n--- Starting Sensitivity Analysis ---")
        try:
            analyzer = SensitivityAnalyzer(
                strategy_optimizer=optimizer, # Reuse optimizer for its config and evaluation cache
                base_params=best_params,
                numeric_perturbation=NUMERIC_PERTURBATION,
                num_samples_per_param=SENS_SAMPLES_PER_PARAM,
                num_random_samples=SENS_RANDOM_SAMPLES,
                parallel=True # Relies on optimizer's internal parallelization/caching
            )

            sensitivity_results_df, parameter_impact_df = analyzer.run()

            if sensitivity_results_df.empty:
                 logger.warning("Sensitivity analysis did not produce results.")
            else:
                logger.info("--- Sensitivity Analysis Results ---")
                logger.info(f"Sensitivity Results saved (see MLflow artifacts or CSV file). Head:\n{sensitivity_results_df.head().to_string()}")
                logger.info(f"\nParameter Impact Report (Correlation):\n{parameter_impact_df.to_string()}")

        except Exception as e:
            logger.error(f"An error occurred during sensitivity analysis: {e}", exc_info=True)
            if mlflow.active_run():
                 mlflow.end_run("FAILED") # End sensitivity run if it crashed

    elif RUN_SENSITIVITY and (not optimizer or not best_params):
        logger.warning("Skipping sensitivity analysis because optimization failed or produced no best parameters.")


    # Ensure any lingering run is terminated cleanly
    # Should not be necessary if 'with mlflow.start_run()' is used correctly inside modules
    # try:
    #     while mlflow.active_run():
    #         logger.info(f"Ending lingering MLflow run: {mlflow.active_run().info.run_id}")
    #         mlflow.end_run()
    # except Exception:
    #      pass # Ignore errors during cleanup

    logger.info("--- Script Finished ---")

# Williams Percent R

In [None]:
"""
Main script to run hyperparameter optimization and sensitivity analysis
for the Know Sure Thing strategy using the portfolio-based evaluation framework.
"""

import json
import logging
import os
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import mlflow
import pandas as pd
import yfinance as yf

sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

try:
    from src.strategies.mean_reversion.williams_percent_r_start import WilliamsRStrategy
    from src.optimizer.strategy_optimizer import StrategyOptimizer
    from src.optimizer.sensitivity_analyzer import SensitivityAnalyzer
    from src.database.config import DatabaseConfig
except ImportError as e:
    print("Error importing modules. Make sure the script is run from the project root")
    print("or the 'src' directory is in the Python path.")
    print(f"Import Error: {e}")
    sys.exit(1)

In [None]:
# single stock

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# MLflow Configuration
MLFLOW_TRACKING_URI = "file:./mlruns"  # Store MLflow data locally in ./mlruns
RUN_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

# Data Configuration
TICKER_FILE_PATH = "../data/ticker.xlsx" # Path relative to project root
MAX_TICKERS = None # Limit tickers for faster testing, set to None to use all

# Backtest Period
START_DATE = (datetime.now() - timedelta(days=4*365)).strftime("%Y-%m-%d")
END_DATE = datetime.now().strftime("%Y-%m-%d")

# Optimization Settings
CV_FOLDS = 5
MAX_EVALS = 50  # Number of hyperparameter sets to evaluate
OPTIMIZATION_METRIC = 'harmonic_mean' # Portfolio metric to maximize (minus penalty)
N_JOBS = -1 # Use all available CPU cores for fold evaluation within optimizer

# Sensitivity Analysis Settings
RUN_SENSITIVITY = False # Set to False to skip sensitivity analysis
NUMERIC_PERTURBATION = 0.15 # +/- 15% for sensitivity
SENS_SAMPLES_PER_PARAM = 5
SENS_RANDOM_SAMPLES = 20

# --- Define Search Space for Awesome Oscillator ---

# Note: Hyperopt doesn't easily enforce short_period < long_period directly during sampling.
# The optimizer will evaluate invalid combinations, and they will likely fail or perform poorly.
# Strategy itself raises ValueError if short >= long during initialization.

from src.optimizer.search_space import williams_percent_r_strat_search_space

# --- Helper Functions ---

def load_tickers(file_path: str, max_tickers: Optional[int] = None) -> List[str]:
    """Loads and formats ticker symbols from an Excel file."""
    logger.info(f"Loading tickers from: {file_path}")
    try:
        tickers_df = pd.read_excel(file_path)
        # Basic validation
        if not all(col in tickers_df.columns for col in ["Security Name"]):
            raise ValueError("Ticker file missing required columns: 'Security Name'")

        tickers_df = tickers_df.drop_duplicates(subset=["Security Name"]).reset_index(drop=True)

        def add_ticker_suffix(row):
            name = str(row["Security Name"]).strip().upper()
            # Fetch company information using yfinance
            stock = yf.Ticker(name)
            exchange = str(stock.info.get("exchange", None)).strip().upper()
            return f"{name}"

        tickers_df["Ticker"] = tickers_df.apply(add_ticker_suffix, axis=1)
        ticker_list = tickers_df["Ticker"].unique().tolist()

        logger.info(f"Loaded {len(ticker_list)} unique tickers.")
        if max_tickers and len(ticker_list) > max_tickers:
            logger.warning(f"Limiting tickers to {max_tickers} for this run.")
            ticker_list = ticker_list[:max_tickers]

        if not ticker_list:
             raise ValueError("No tickers loaded.")

        return ticker_list

    except FileNotFoundError:
        logger.error(f"Ticker file not found at: {file_path}")
        raise
    except Exception as e:
        logger.error(f"Error processing ticker file: {e}")
        raise

# --- Main Execution ---

if __name__ == "__main__":
    logger.info("--- Starting Williams Perecent R Index Optimization Script ---")

    # Setup MLflow
    try:
        mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
        logger.info(f"MLflow tracking URI set to: {MLFLOW_TRACKING_URI}")
    except Exception as e:
        logger.error(f"Failed to set MLflow tracking URI: {e}")
        sys.exit(1)

        # Create MLflow experiment if it doesn't exist
    try:
        experiment_name = f"Williams Percent R_index_{RUN_TIMESTAMP}"
        # Check if experiment exists
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            # Create new experiment
            experiment_id = mlflow.create_experiment(experiment_name)
            logger.info(f"Created new MLflow experiment: {experiment_name} with ID: {experiment_id}")
        else:
            experiment_id = experiment.experiment_id
            logger.info(f"Using existing MLflow experiment: {experiment_name} with ID: {experiment_id}")
        
        # Set the experiment for subsequent runs
        mlflow.set_experiment(experiment_name)
    except Exception as e:
        logger.error(f"Failed to create or set MLflow experiment: {e}")
        sys.exit(1)

    # Load Tickers
    try:
        tickers_to_run = load_tickers(TICKER_FILE_PATH, MAX_TICKERS)
    except Exception:
        logger.error("Failed to load tickers. Exiting.")
        sys.exit(1)

    # Database Config
    try:
        db_config = DatabaseConfig.default()
        # Optional: Add a check here to ensure DB connection is valid if possible
        logger.info("Database configuration loaded.")
    except Exception as e:
        logger.error(f"Failed to load database configuration: {e}")
        sys.exit(1)

    # --- Run Optimization ---
    optimizer = None
    best_params = {}
    portfolio_performance_report = pd.DataFrame()
    param_history_report = pd.DataFrame()

    logger.info(f"Initializing StrategyOptimizer for {WilliamsRStrategy.__name__}")
    try:
        optimizer = StrategyOptimizer(
            strategy_class=WilliamsRStrategy,
            db_config=db_config,
            search_space=williams_percent_r_strat_search_space,
            tickers=tickers_to_run,
            start_date=START_DATE,
            end_date=END_DATE,
            cv_folds=CV_FOLDS,
            max_evals=MAX_EVALS,
            optimization_metric=OPTIMIZATION_METRIC,
            run_name=f"Williams Perecent R_{RUN_TIMESTAMP}",
            n_jobs=N_JOBS
            # risk_thresholds can be customized here if needed, otherwise defaults are used
        )

        logger.info("Starting hyperparameter optimization...")
        best_params, portfolio_performance_report, param_history_report = optimizer.run_optimization()

        if not best_params:
             logger.error("Optimization did not yield valid results. Best parameters not found.")
        else:
             logger.info("--- Optimization Results ---")
             logger.info(f"Best Parameters found:\n{json.dumps(best_params, indent=2)}")
             logger.info(f"\nBest Portfolio Performance Report:\n{portfolio_performance_report.to_string()}")
             logger.info(f"\nParameter History saved (see MLflow artifacts or CSV file). Head:\n{param_history_report.head().to_string()}")

    except Exception as e:
        logger.error(f"An error occurred during optimization: {e}", exc_info=True)
        # Attempt to end MLflow run if it was started by the optimizer
        if mlflow.active_run():
            mlflow.end_run("FAILED")

    # --- Run Sensitivity Analysis (Optional) ---
    if RUN_SENSITIVITY and optimizer and best_params:
        logger.info("\n--- Starting Sensitivity Analysis ---")
        try:
            analyzer = SensitivityAnalyzer(
                strategy_optimizer=optimizer, # Reuse optimizer for its config and evaluation cache
                base_params=best_params,
                numeric_perturbation=NUMERIC_PERTURBATION,
                num_samples_per_param=SENS_SAMPLES_PER_PARAM,
                num_random_samples=SENS_RANDOM_SAMPLES,
                parallel=True # Relies on optimizer's internal parallelization/caching
            )

            sensitivity_results_df, parameter_impact_df = analyzer.run()

            if sensitivity_results_df.empty:
                 logger.warning("Sensitivity analysis did not produce results.")
            else:
                logger.info("--- Sensitivity Analysis Results ---")
                logger.info(f"Sensitivity Results saved (see MLflow artifacts or CSV file). Head:\n{sensitivity_results_df.head().to_string()}")
                logger.info(f"\nParameter Impact Report (Correlation):\n{parameter_impact_df.to_string()}")

        except Exception as e:
            logger.error(f"An error occurred during sensitivity analysis: {e}", exc_info=True)
            if mlflow.active_run():
                 mlflow.end_run("FAILED") # End sensitivity run if it crashed

    elif RUN_SENSITIVITY and (not optimizer or not best_params):
        logger.warning("Skipping sensitivity analysis because optimization failed or produced no best parameters.")


    # Ensure any lingering run is terminated cleanly
    # Should not be necessary if 'with mlflow.start_run()' is used correctly inside modules
    # try:
    #     while mlflow.active_run():
    #         logger.info(f"Ending lingering MLflow run: {mlflow.active_run().info.run_id}")
    #         mlflow.end_run()
    # except Exception:
    #      pass # Ignore errors during cleanup

    logger.info("--- Script Finished ---")

# Buy and Hold

In [None]:
# trading_system/scripts/evaluate_bnh_benchmark.py

import json
import logging
import os
import sys
from datetime import datetime, timedelta
from typing import List, Optional

import mlflow
import pandas as pd
import yfinance as yf

# --- Adjust path to import from src ---
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

try:
    from src.strategies.buy_and_hold_baseline import BuyAndHoldStrategy
    from src.database.config import DatabaseConfig
    from src.optimizer.performance_evaluator import PerformanceEvaluator, MetricsDict
except ImportError as e:
    print(f"Error importing modules. Ensure your PYTHONPATH is set correctly or run from project root.")
    print(f"Current sys.path: {sys.path}")
    print(f"Import Error: {e}")
    sys.exit(1)

# --- Configuration ---

# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# MLflow Configuration (Optional for direct benchmark evaluation, but can be useful for tracking)
MLFLOW_TRACKING_URI = "file:./mlruns"
RUN_TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
MLFLOW_EXPERIMENT_NAME = f"Benchmark_Evaluations_{datetime.now().strftime('%Y%m%d')}"
RUN_MLFLOW = True # Set to False if you don't want to log this evaluation to MLflow

# Data Configuration
TICKER_FILE_PATH = "../data/ticker.xlsx" # Path relative to project root
MAX_TICKERS = 10 # Limit tickers for faster testing, set to None to use all
# Alternatively, define a specific list:
# TICKERS_TO_EVALUATE = ["RELIANCE.BO", "INFY.BO"] # Overrides TICKER_FILE_PATH if not None
TICKERS_TO_EVALUATE = None # Set to a list to use specific tickers, otherwise loads from file

# Backtest Period
START_DATE = (datetime.now() - timedelta(days=4*365)).strftime("%Y-%m-%d")
END_DATE = datetime.now().strftime("%Y-%m-%d")

# Buy and Hold Strategy Parameters for RiskManager
# For a "purer" buy-and-hold benchmark, minimize impact of RiskManager's stops/targets.
# Set stop_loss_pct and take_profit_pct to values that are unlikely to be hit.
# Slippage and transaction costs are still good to include for realism.
BENCHMARK_STRATEGY_PARAMS = {
    'stop_loss_pct': 1.0,          # Effectively no stop loss (100% loss from entry)
    'take_profit_pct': 100.0,      # Effectively no take profit (10,000% gain from entry)
    'trailing_stop_pct': 0.0,      # Disable trailing stop
    'slippage_pct': 0.001,         # Example slippage
    'transaction_cost_pct': 0.001, # Example transaction cost
    'data_lookback': 252           # For BaseStrategy's get_historical_prices if no date range given
}


# --- Helper Functions (Copied from your example for consistency) ---
def load_tickers(file_path: str, max_tickers: Optional[int] = None) -> List[str]:
    """Loads and formats ticker symbols from an Excel file."""
    logger.info(f"Loading tickers from: {file_path}")
    try:
        tickers_df = pd.read_excel(file_path)
        # Basic validation
        if not all(col in tickers_df.columns for col in ["Security Name"]):
            raise ValueError("Ticker file missing required columns: 'Security Name'")

        tickers_df = tickers_df.drop_duplicates(subset=["Security Name"]).reset_index(drop=True)

        def add_ticker_suffix(row):
            name = str(row["Security Name"]).strip().upper()
            # Fetch company information using yfinance
            stock = yf.Ticker(name)
            exchange = str(stock.info.get("exchange", None)).strip().upper()
            return f"{name}"
        tickers_df["Ticker"] = tickers_df.apply(add_ticker_suffix, axis=1)
        ticker_list = tickers_df["Ticker"].unique().tolist()
        logger.info(f"Loaded {len(ticker_list)} unique tickers.")
        if max_tickers and len(ticker_list) > max_tickers:
            logger.warning(f"Limiting tickers to {max_tickers} for this run.")
            ticker_list = ticker_list[:max_tickers]
        if not ticker_list: raise ValueError("No tickers loaded.")
        return ticker_list
    except FileNotFoundError: logger.error(f"Ticker file not found at: {file_path}"); raise
    except Exception as e: logger.error(f"Error processing ticker file: {e}"); raise

# --- Main Execution ---
def main():
    global RUN_MLFLOW # <--- DECLARE GLOBAL AT THE START OF THE FUNCTION

    logger.info("--- Starting Buy and Hold Benchmark Evaluation Script ---")

    # Setup MLflow (Optional)
    if RUN_MLFLOW: # Now RUN_MLFLOW is checked after potential global declaration
        try:
            mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
            experiment = mlflow.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME)
            if experiment is None:
                experiment_id = mlflow.create_experiment(MLFLOW_EXPERIMENT_NAME)
                logger.info(f"Created new MLflow experiment: {MLFLOW_EXPERIMENT_NAME} with ID: {experiment_id}")
            else:
                experiment_id = experiment.experiment_id
                logger.info(f"Using existing MLflow experiment: {MLFLOW_EXPERIMENT_NAME} with ID: {experiment_id}")
            mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)
        except Exception as e:
            logger.error(f"Failed to setup MLflow: {e}. Continuing without MLflow logging.")
            # If MLflow setup fails, we set the global RUN_MLFLOW to False
            RUN_MLFLOW = False # This modification is now valid due to the global declaration above
    
    # ... (rest of the main function remains the same) ...

    # Load Tickers or use defined list
    if TICKERS_TO_EVALUATE:
        tickers_to_run = TICKERS_TO_EVALUATE
        logger.info(f"Using predefined ticker list: {tickers_to_run}")
    else:
        try:
            tickers_to_run = load_tickers(TICKER_FILE_PATH, MAX_TICKERS)
        except Exception:
            logger.error("Failed to load tickers. Exiting.")
            sys.exit(1)

    # Database Config
    try:
        db_config = DatabaseConfig.default()
        logger.info("Database configuration loaded.")
    except Exception as e:
        logger.error(f"Failed to load database configuration: {e}")
        sys.exit(1)

    # Start MLflow Run (if enabled)
    active_mlflow_run = None
    if RUN_MLFLOW: # This check now uses the potentially modified RUN_MLFLOW
        try:
            active_mlflow_run = mlflow.start_run(run_name=f"BuyAndHold_Eval_{RUN_TIMESTAMP}")
            mlflow.log_param("strategy_class", BuyAndHoldStrategy.__name__)
            mlflow.log_param("tickers_evaluated_count", len(tickers_to_run))
            mlflow.log_param("tickers_sample", ", ".join(tickers_to_run[:5]) + ('...' if len(tickers_to_run) > 5 else ''))
            mlflow.log_param("start_date", START_DATE)
            mlflow.log_param("end_date", END_DATE)
            mlflow.log_params({f"param_{k}": v for k, v in BENCHMARK_STRATEGY_PARAMS.items()})
        except Exception as e:
            logger.error(f"Failed to start MLflow run: {e}")
            active_mlflow_run = None


    logger.info(f"Initializing {BuyAndHoldStrategy.__name__} with params: {json.dumps(BENCHMARK_STRATEGY_PARAMS, indent=2)}")
    bnh_strategy = BuyAndHoldStrategy(db_config=db_config, params=BENCHMARK_STRATEGY_PARAMS)

    logger.info(f"Generating signals for BuyAndHoldStrategy from {START_DATE} to {END_DATE} for {len(tickers_to_run)} tickers.")
    df_bnh_results = bnh_strategy.generate_signals(
        ticker=tickers_to_run,
        start_date=START_DATE,
        end_date=END_DATE,
        initial_position=0,
        latest_only=False
    )

    if df_bnh_results.empty:
        logger.error("BuyAndHoldStrategy generated an empty DataFrame. Cannot calculate metrics.")
        if active_mlflow_run: mlflow.log_metric("evaluation_status", 0); mlflow.end_run("FAILED")
        sys.exit(1)

    logger.info(f"BuyAndHoldStrategy generated {len(df_bnh_results)} signal rows.")

    signals_dict_for_evaluator: Dict[str, pd.DataFrame] = {}
    if isinstance(df_bnh_results.index, pd.MultiIndex):
        for ticker_name, group_df in df_bnh_results.groupby(level='ticker'):
            signals_dict_for_evaluator[ticker_name] = group_df.droplevel('ticker')
    elif isinstance(tickers_to_run, str) or len(tickers_to_run) == 1:
        ticker_name = tickers_to_run[0] if isinstance(tickers_to_run, list) else tickers_to_run
        signals_dict_for_evaluator[ticker_name] = df_bnh_results
    else:
        logger.error("Unexpected format for df_bnh_results. Cannot prepare for PerformanceEvaluator.")
        if active_mlflow_run: mlflow.log_metric("evaluation_status", 0); mlflow.end_run("FAILED")
        sys.exit(1)

    logger.info("Calculating portfolio metrics for the Buy and Hold benchmark...")
    portfolio_metrics: MetricsDict = PerformanceEvaluator.compute_portfolio_metrics(
        signals_dict=signals_dict_for_evaluator
    )

    logger.info("\n--- Buy and Hold Benchmark Portfolio Performance Metrics ---")
    if portfolio_metrics:
        for metric, value in portfolio_metrics.items():
            display_value = f"{value:.4%}" if any(sub in metric for sub in ["return", "pct", "rate", "drawdown"]) \
                            else f"{value:.2f} days" if "duration" in metric \
                            else f"{value:.4f}"
            logger.info(f"{metric.replace('_', ' ').title()}: {display_value}")
            if active_mlflow_run and pd.notna(value) and RUN_MLFLOW: # Check RUN_MLFLOW again before logging
                 mlflow.log_metric(f"portfolio_{metric}", value)
        if active_mlflow_run and RUN_MLFLOW: mlflow.log_metric("evaluation_status", 1)
    else:
        logger.warning("No portfolio metrics were calculated.")
        if active_mlflow_run and RUN_MLFLOW: mlflow.log_metric("evaluation_status", 0)

    if portfolio_metrics:
        results_df = pd.DataFrame([portfolio_metrics])
        results_df['tickers_evaluated_count'] = len(tickers_to_run)
        results_df['start_date'] = START_DATE
        results_df['end_date'] = END_DATE
        for p_name, p_val in BENCHMARK_STRATEGY_PARAMS.items():
            results_df[f'param_{p_name}'] = str(p_val)

        results_filename = f"benchmark_bnh_metrics_{RUN_TIMESTAMP}.csv"
        results_df.to_csv(results_filename, index=False)
        logger.info(f"Benchmark metrics saved to {results_filename}")
        if active_mlflow_run and RUN_MLFLOW:
            try:
                mlflow.log_artifact(results_filename)
            except Exception as e:
                logger.error(f"Failed to log artifact {results_filename} to MLflow: {e}")


    if active_mlflow_run and RUN_MLFLOW: # Check RUN_MLFLOW before ending run
        mlflow.end_run()

    logger.info("--- Benchmark Evaluation Script Finished ---")

if __name__ == "__main__":
    main()