# Backtesting code

In [5]:
!pip install backtrader

Collecting backtrader
  Downloading backtrader-1.9.78.123-py2.py3-none-any.whl.metadata (6.8 kB)
Downloading backtrader-1.9.78.123-py2.py3-none-any.whl (419 kB)
Installing collected packages: backtrader
Successfully installed backtrader-1.9.78.123


In [None]:
from datetime import datetime
import backtrader as bt
import yfinance as yf
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')

# Define your strategy
class AlphaFactorStrategy(bt.Strategy):
    params = (
        ('buy_threshold', 0.02),  # Example buy threshold
        ('sell_threshold', -0.01), # Example sell threshold
    )

    def __init__(self):
        self.alpha_factor = {}

    def next(self):
        for data in self.datas:
            ticker = data._name
            current_date = self.data.datetime.date(0)
            alpha_value = alpha_dict.get((current_date, ticker), None)

            if alpha_value is not None:
                if alpha_value > self.p.buy_threshold and not self.getposition(data).size > 0:
                    self.buy(data)
                elif alpha_value < self.p.sell_threshold and not self.getposition(data).size < 0:
                    self.sell(data)

# Create a cerebro instance
cerebro = bt.Cerebro()

# Load your alpha factor dataset
alpha_data = pd.read_csv('hang_seng_index_stocks_with_factors.csv', parse_dates=['Date'])

# Identify all potential alpha factor columns
excluded_columns = ['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']
alpha_factor_columns = [col for col in alpha_data.columns if col not in excluded_columns]

# Iterate through each alpha factor column and run a backtest
for alpha_factor_column in alpha_factor_columns:
    print(f"\n--- Backtest for Alpha Factor: {alpha_factor_column} ---")

    # Create a new cerebro instance for each alpha factor
    cerebro = bt.Cerebro()

    # Create a dictionary to store alpha factors indexed by date and ticker for the current alpha factor
    alpha_dict = {}
    for index, row in alpha_data.iterrows():
        date = row['Date'].date()
        ticker = row['Ticker']
        alpha_value = row[alpha_factor_column]
        alpha_dict[(date, ticker)] = alpha_value

    # Add data for each stock
    tickers = alpha_data['Ticker'].unique()
    for ticker in tickers:
        ticker_data = alpha_data[alpha_data['Ticker'] == ticker]

        data = bt.feeds.PandasData(
            dataname=ticker_data,
            datetime='Date',
            close='Close',
            open='Open',
            high='High',
            low='Low',
            volume='Volume'
        )
        cerebro.adddata(data, name=ticker)

    # Add the strategy with the defined thresholds
    cerebro.addstrategy(AlphaFactorStrategy, buy_threshold=0.02, sell_threshold=-0.01) # You can adjust these thresholds

    # Add the analyzers
    cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='annual_return')
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe_ratio')
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trade_analyzer') # Optional: For more detailed trade analysis

    # Set initial cash
    cerebro.broker.setcash(100000)

    # Run the backtest
    results = cerebro.run()

    # Print the final portfolio value
    print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

    # Print the analysis results
    if results and results[0].analyzers:
        print('\n--- Backtest Performance ---')
        # Annual Return
        annual_return = results[0].analyzers.annual_return.get_analysis()
        print('\nAnnual Returns:')
        for year, ret in annual_return.items():
            print(f'Year: {year}, Return: {ret:.4f}')

        # Sharpe Ratio
        sharpe_ratio = results[0].analyzers.sharpe_ratio.get_analysis()
        print(f'\nSharpe Ratio: {sharpe_ratio["sharperatio"]:.4f}')

        # Max Drawdown
        drawdown = results[0].analyzers.drawdown.get_analysis()
        print(f'\nMaximum Drawdown:')
        print(f'  Max: {drawdown["max"]["drawdown"]:.2f}%')
        print(f'  Duration: {drawdown["max"]["len"]} days')

        # Optional: Print trade analysis
        trade_analysis = results[0].analyzers.trade_analyzer.get_analysis()
        if trade_analysis['total']['total'] > 0:
            print('\n--- Trade Analysis ---')
            print(f"Total Trades: {trade_analysis['total']['total']}")
            print(f"Winning Trades: {trade_analysis['won']['total']}" if 'won' in trade_analysis else "0")
            print(f"Losing Trades: {trade_analysis['lost']['total']}" if 'lost' in trade_analysis else "0")

            won_pnl_net_total = trade_analysis.get('won', {}).get('pnl', {}).get('net', {}).get('total', 0)
            total_pnl_net_total = trade_analysis.get('total', {}).get('pnl', {}).get('net', {}).get('total', 0)

            if total_pnl_net_total != 0:
                win_rate = (won_pnl_net_total / total_pnl_net_total) * 100
                print(f"Win Rate: {win_rate:.2f}%")
            else:
                print("Win Rate: N/A")

            print(f"Average Profit: {trade_analysis.get('won', {}).get('pnl', {}).get('net', {}).get('average', 0):.2f}")
            print(f"Average Loss: {trade_analysis.get('lost', {}).get('pnl', {}).get('net', {}).get('average', 0):.2f}")


--- Backtest for Alpha Factor: Price_Momentum ---
Final Portfolio Value: 99292.04

--- Backtest Performance ---

Annual Returns:
Year: 2020, Return: 0.0000
Year: 2021, Return: -0.0066
Year: 2022, Return: 0.0034
Year: 2023, Return: -0.0066
Year: 2024, Return: 0.0017
Year: 2025, Return: 0.0010

Sharpe Ratio: -2.8137

Maximum Drawdown:
  Max: 1.52%
  Duration: 741 days

--- Trade Analysis ---
Total Trades: 5780
Winning Trades: 2334
Losing Trades: 3372
Win Rate: N/A
Average Profit: 0.00
Average Loss: 0.00

--- Backtest for Alpha Factor: Volume_Momentum ---
Final Portfolio Value: 100356.09

--- Backtest Performance ---

Annual Returns:
Year: 2020, Return: 0.0000
Year: 2021, Return: -0.0020
Year: 2022, Return: 0.0049
Year: 2023, Return: -0.0041
Year: 2024, Return: -0.0006
Year: 2025, Return: 0.0053

Sharpe Ratio: -2.7232

Maximum Drawdown:
  Max: 1.31%
  Duration: 648 days

--- Trade Analysis ---
Total Trades: 13696
Winning Trades: 6783
Losing Trades: 6842
Win Rate: N/A
Average Profit: 0.00

In [None]:
from datetime import datetime
import backtrader as bt
import yfinance as yf
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')

# Define your strategy with buy and sell thresholds as parameters
class AlphaFactorStrategy(bt.Strategy):
    params = (
        ('buy_threshold', 0.02),
        ('sell_threshold', -0.01),
    )

    def __init__(self):
        pass  # Removed unused self.alpha_factor

    def next(self):
        for data in self.datas:
            ticker = data._name
            current_date = self.data.datetime.date(0)

            # Access the alpha_dict directly, which was populated from the CSV
            alpha_value = alpha_dict.get((current_date, ticker), None)

            if alpha_value is not None:
                if alpha_value > self.p.buy_threshold and not self.getposition(data).size > 0:
                    self.buy(data)
                elif alpha_value < self.p.sell_threshold and not self.getposition(data).size < 0:
                    self.sell(data)

try:
    # Load your alpha factor dataset
    alpha_data = pd.read_csv('hang_seng_index_stocks_with_factors.csv', parse_dates=['Date'])
except FileNotFoundError:
    print("Error: The file 'hang_seng_index_stocks_with_factors.csv' was not found.")
    exit()
except pd.errors.EmptyDataError:
    print("Error: The file 'hang_seng_index_stocks_with_factors.csv' is empty.")
    exit()
except Exception as e:
    print(f"An error occurred while loading the CSV file: {e}")
    exit()

# Identify all potential alpha factor columns
excluded_columns = ['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']
alpha_factor_columns = [col for col in alpha_data.columns if col not in excluded_columns]

# Iterate through each alpha factor column and run a backtest
for alpha_factor_column in alpha_factor_columns:
    print(f"\n--- Optimization for Alpha Factor: {alpha_factor_column} ---")

    # Create a new cerebro instance for each alpha factor
    cerebro = bt.Cerebro()

    # Create a dictionary to store alpha factors indexed by date and ticker for the current alpha factor
    alpha_dict = {}
    for index, row in alpha_data.iterrows():
        date = row['Date'].date()
        ticker = row['Ticker']
        alpha_value = row[alpha_factor_column]
        alpha_dict[(date, ticker)] = alpha_value

    # Add data for each stock
    tickers = alpha_data['Ticker'].unique()
    for ticker in tickers:
        ticker_data = alpha_data[alpha_data['Ticker'] == ticker]

        data = bt.feeds.PandasData(
            dataname=ticker_data,
            datetime='Date',
            close='Close',
            open='Open',
            high='High',
            low='Low',
            volume='Volume'
        )
        cerebro.adddata(data, name=ticker)

    # Add the strategy for optimization - Use optstrategy instead of addstrategy
    cerebro.optstrategy(AlphaFactorStrategy,
                        buy_threshold=range(1, 5, 1),  # Example range: 0.01, 0.02, 0.03, 0.04
                        sell_threshold=range(-4, 0, 1)) # Example range: -0.04, -0.03, -0.02, -0.01
    
    # Set the commission - Example: 0.1% per trade
    cerebro.broker.setcommission(commission=0.001)

    # Set initial cash
    cerebro.broker.setcash(100000)

    # Add the analyzers
    cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='annual_return')
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe_ratio', riskfreerate=0.02)  # Example 2% annual risk-free rate
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trade_analyzer')

    # Run the backtest
    results = cerebro.run()

    # Print the final portfolio value
    print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())

    # Print the analysis results
    if results and results[0].analyzers:
        print('\n--- Backtest Performance ---')
        # Annual Return
        annual_return = results[0].analyzers.annual_return.get_analysis()
        print('\nAnnual Returns:')
        for year, ret in annual_return.items():
            print(f'Year: {year}, Return: {ret:.4f}')

        # Sharpe Ratio
        sharpe_ratio = results[0].analyzers.sharpe_ratio.get_analysis()
        print(f'\nSharpe Ratio: {sharpe_ratio["sharperatio"]:.4f}')

        # Max Drawdown
        drawdown = results[0].analyzers.drawdown.get_analysis()
        print(f'\nMaximum Drawdown:')
        print(f'  Max: {drawdown["max"]["drawdown"]:.2f}%')
        print(f'  Duration: {drawdown["max"]["len"]} days')

        # Trade Analysis
        trade_analysis = results[0].analyzers.trade_analyzer.get_analysis()
        if trade_analysis['total']['total'] > 0:
            print('\n--- Trade Analysis ---')
            print(f"Total Trades: {trade_analysis['total']['total']}")
            print(f"Winning Trades: {trade_analysis.get('won', {}).get('total', 0)}")
            print(f"Losing Trades: {trade_analysis.get('lost', {}).get('total', 0)}")

            won_pnl_net_total = trade_analysis.get('won', {}).get('pnl', {}).get('net', {}).get('total', 0)
            total_pnl_net_total = trade_analysis.get('total', {}).get('pnl', {}).get('net', {}).get('total', 0)

            if total_pnl_net_total != 0:
                win_rate = (won_pnl_net_total / total_pnl_net_total) * 100
                print(f"Win Rate: {win_rate:.2f}%")
            else:
                print("Win Rate: N/A")

            print(f"Average Profit: {trade_analysis.get('won', {}).get('pnl', {}).get('net', {}).get('average', 0):.2f}")
            print(f"Average Loss: {trade_analysis.get('lost', {}).get('pnl', {}).get('net', {}).get('average', 0):.2f}")

    # Plot the results for the last alpha factor tested (optional - can be resource-intensive for many factors)
    if alpha_factor_column == alpha_factor_columns[-1]:
        cerebro.plot()

In [15]:
from datetime import datetime
import backtrader as bt
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')

# Define your strategy with buy and sell thresholds as parameters
class AlphaFactorStrategy(bt.Strategy):
    params = (
        ('buy_threshold', 0.02),
        ('sell_threshold', -0.01),
    )

    def __init__(self):
        pass

    def next(self):
        for data in self.datas:
            ticker = data._name
            current_date = self.data.datetime.date(0)
            alpha_value = alpha_dict.get((current_date, ticker), None)

            if alpha_value is not None:
                if alpha_value > self.p.buy_threshold and not self.getposition(data).size > 0:
                    self.buy(data)
                elif alpha_value < self.p.sell_threshold and not self.getposition(data).size < 0:
                    self.sell(data)

try:
    # Load your alpha factor dataset
    alpha_data = pd.read_csv('hang_seng_index_stocks_with_factors.csv', parse_dates=['Date'])
except FileNotFoundError:
    print("Error: The file 'hang_seng_index_stocks_with_factors.csv' was not found.")
    exit()
except pd.errors.EmptyDataError:
    print("Error: The file 'hang_seng_index_stocks_with_factors.csv' is empty.")
    exit()
except Exception as e:
    print(f"An error occurred while loading the CSV file: {e}")
    exit()

# Identify all potential alpha factor columns
excluded_columns = ['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']
alpha_factor_columns = [col for col in alpha_data.columns if col not in excluded_columns]

# Define the ranges for optimization
buy_threshold_range = range(1, 5, 1)  # Test values like 0.01, 0.02, 0.03, 0.04
sell_threshold_range = range(-4, 0, 1) # Test values like -0.04, -0.03, -0.02, -0.01

# Iterate through each alpha factor column and run optimization
for alpha_factor_column in alpha_factor_columns:
    print(f"\n--- Optimization for Alpha Factor: {alpha_factor_column} ---")

    # Create a new cerebro instance for each alpha factor
    cerebro = bt.Cerebro()

    # Create a dictionary to store alpha factors indexed by date and ticker for the current alpha factor
    alpha_dict = {}
    for index, row in alpha_data.iterrows():
        date = row['Date'].date()
        ticker = row['Ticker']
        alpha_value = row[alpha_factor_column]
        alpha_dict[(date, ticker)] = alpha_value

    # Add data for each stock
    tickers = alpha_data['Ticker'].unique()
    for ticker in tickers:
        ticker_data = alpha_data[alpha_data['Ticker'] == ticker]

        data = bt.feeds.PandasData(
            dataname=ticker_data,
            datetime='Date',
            close='Close',
            open='Open',
            high='High',
            low='Low',
            volume='Volume'
        )
        cerebro.adddata(data, name=ticker)

    # Add the strategy for optimization
    cerebro.optstrategy(AlphaFactorStrategy,
                        buy_threshold= buy_threshold_range,
                        sell_threshold= sell_threshold_range)

    # Set the commission
    cerebro.broker.setcommission(commission=0.001)

    # Set initial cash
    cerebro.broker.setcash(100000)

    # Add the analyzers
    cerebro.addanalyzer(bt.analyzers.AnnualReturn, _name='annual_return')
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe_ratio', riskfreerate=0.02)
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trade_analyzer')

    # Run the optimization
    optimized_results = cerebro.run(maxcpus=1)

    # Analyze the optimized results to find the best Sharpe Ratio
    best_sharpe_ratio = -float('inf')
    best_annual_return = -float('inf')
    min_max_drawdown = float('inf')
    best_buy_threshold = None
    best_sell_threshold = None
    best_params = False  # Initialize best_params

    for run in optimized_results:
        first_strategy = run[0]
        sharpe_analysis = first_strategy.analyzers.sharpe_ratio.get_analysis()
        sharpe = sharpe_analysis.get('sharperatio')  # Use .get() to handle potential missing key
        annual_ret_analysis = first_strategy.analyzers.annual_return.get_analysis()
        overall_annual_return = list(annual_ret_analysis.values())[-1] if annual_ret_analysis else -float('inf')
        max_drawdown = first_strategy.analyzers.drawdown.get_analysis()['max']['drawdown']

        if sharpe is not None and sharpe > best_sharpe_ratio:
            best_sharpe_ratio = sharpe
            best_annual_return = overall_annual_return
            min_max_drawdown = max_drawdown
            best_buy_threshold = first_strategy.p.buy_threshold / 100.0
            best_sell_threshold = first_strategy.p.sell_threshold / 100.0
            best_params = True  # Set best_params to True when a best result is found
        elif sharpe is not None and sharpe == best_sharpe_ratio:
            # If Sharpe is the same, prefer higher return and lower drawdown
            if overall_annual_return > best_annual_return:
                best_annual_return = overall_annual_return
                min_max_drawdown = max_drawdown
                best_buy_threshold = first_strategy.p.buy_threshold / 100.0
                best_sell_threshold = first_strategy.p.sell_threshold / 100.0
            elif overall_annual_return == best_annual_return and max_drawdown < min_max_drawdown:
                min_max_drawdown = max_drawdown
                best_buy_threshold = first_strategy.p.buy_threshold / 100.0
                best_sell_threshold = first_strategy.p.sell_threshold / 100.0
            best_params = True

    if best_params:
        print("\n--- Best Optimization Results (Based on Sharpe Ratio) ---")
        print(f"Alpha Factor: {alpha_factor_column}")
        print(f"Best Buy Threshold: {best_buy_threshold:.4f}")
        print(f"Best Sell Threshold: {best_sell_threshold:.4f}")
        print(f"Best Sharpe Ratio: {best_sharpe_ratio:.4f}")
        print(f"Corresponding Annual Return: {best_annual_return:.4f}")
        print(f"Corresponding Max Drawdown: {min_max_drawdown:.2f}%")

# You can choose to plot the results of the best run if you want
# if best_params and alpha_factor_column == alpha_factor_columns[-1]:
#     cerebro.plot()


--- Optimization for Alpha Factor: Price_Momentum ---

--- Best Optimization Results (Based on Sharpe Ratio) ---
Alpha Factor: Price_Momentum
Best Buy Threshold: 0.0100
Best Sell Threshold: -0.0400
Best Sharpe Ratio: -5.5027
Corresponding Annual Return: 0.0013
Corresponding Max Drawdown: 2.24%

--- Optimization for Alpha Factor: Volume_Momentum ---

--- Best Optimization Results (Based on Sharpe Ratio) ---
Alpha Factor: Volume_Momentum
Best Buy Threshold: 0.0100
Best Sell Threshold: -0.0400
Best Sharpe Ratio: -5.2746
Corresponding Annual Return: 0.0046
Corresponding Max Drawdown: 2.18%

--- Optimization for Alpha Factor: Rate_of_Change_Momentum ---

--- Best Optimization Results (Based on Sharpe Ratio) ---
Alpha Factor: Rate_of_Change_Momentum
Best Buy Threshold: 0.0100
Best Sell Threshold: -0.0400
Best Sharpe Ratio: -3.2896
Corresponding Annual Return: 0.0028
Corresponding Max Drawdown: 3.06%

--- Optimization for Alpha Factor: Mean_Reversion_20 ---

--- Best Optimization Results (Ba

In [20]:
from datetime import datetime
import backtrader as bt
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')

# Define your strategy with buy and sell thresholds as parameters
class AlphaFactorStrategy(bt.Strategy):
    params = (
        ('buy_threshold', 0.02),
        ('sell_threshold', -0.01),
        ('alpha_dict', None),  # Keep alpha_dict in params
    )

    def __init__(self):
        pass

    def next(self):
        for data in self.datas:
            ticker = data._name
            current_date = self.data.datetime.date(0)
            alpha_value = self.p.alpha_dict.get((current_date, ticker), None)

            if alpha_value is not None:
                if alpha_value > self.p.buy_threshold and not self.getposition(data).size > 0:
                    self.buy(data)
                elif alpha_value < self.p.sell_threshold and not self.getposition(data).size < 0:
                    self.sell(data)

try:
    # Load your alpha factor dataset
    alpha_data = pd.read_csv('hang_seng_index_stocks_with_factors.csv', parse_dates=['Date'])
except FileNotFoundError:
    print("Error: The file 'hang_seng_index_stocks_with_factors.csv' was not found.")
    exit()
except pd.errors.EmptyDataError:
    print("Error: The file 'hang_seng_index_stocks_with_factors.csv' is empty.")
    exit()
except Exception as e:
    print(f"An error occurred while loading the CSV file: {e}")
    exit()

# Identify all potential alpha factor columns
excluded_columns = ['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']
alpha_factor_columns = [col for col in alpha_data.columns if col not in excluded_columns]

# Define the ranges for optimization
buy_threshold_range = range(1, 5, 1)  # Test values like 0.01, 0.02, 0.03, 0.04
sell_threshold_range = range(-4, 0, 1) # Test values like -0.04, -0.03, -0.02, -0.01

# Split the data into in-sample and out-of-sample sets
split_date = alpha_data['Date'].max() - pd.Timedelta(days=365)  # Use the last year as out-of-sample
in_sample_data = alpha_data[alpha_data['Date'] <= split_date].copy()
out_of_sample_data = alpha_data[alpha_data['Date'] > split_date].copy()

# Dictionary to store the best parameters for each alpha factor
best_parameters = {}

# Iterate through each alpha factor column and run optimization (in-sample)
for alpha_factor_column in alpha_factor_columns:
    print(f"\n--- In-Sample Optimization for Alpha Factor: {alpha_factor_column} ---")

    # Create a new cerebro instance for each alpha factor
    cerebro_opt = bt.Cerebro()

    # Create a dictionary to store alpha factors indexed by date and ticker for the current alpha factor (in-sample)
    alpha_dict_in_sample = {}
    for index, row in in_sample_data.iterrows():
        date = row['Date'].date()
        ticker = row['Ticker']
        alpha_value = row[alpha_factor_column]
        alpha_dict_in_sample[(date, ticker)] = alpha_value

    # Add data for each stock (in-sample)
    tickers = in_sample_data['Ticker'].unique()
    for ticker in tickers:
        ticker_data_in_sample = in_sample_data[in_sample_data['Ticker'] == ticker].copy()

        data = bt.feeds.PandasData(
            dataname=ticker_data_in_sample,
            datetime='Date',
            close='Close',
            open='Open',
            high='High',
            low='Low',
            volume='Volume'
        )
        cerebro_opt.adddata(data, name=ticker)

    # Add the strategy for optimization
    cerebro_opt.optstrategy(AlphaFactorStrategy,
                            buy_threshold=buy_threshold_range,
                            sell_threshold=sell_threshold_range,
                            alpha_dict=[alpha_dict_in_sample]  # Pass alpha_dict as a single-element list
                            )

    # Set the commission
    cerebro_opt.broker.setcommission(commission=0.001)

    # Set initial cash
    cerebro_opt.broker.setcash(100000)

    # Add the analyzers
    cerebro_opt.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe_ratio', riskfreerate=0.02)

    # Run the optimization
    optimized_results = cerebro_opt.run(maxcpus=1)

    # Analyze the optimized results to find the best Sharpe Ratio
    best_sharpe_ratio = -float('inf')
    best_buy_threshold = None
    best_sell_threshold = None

    for run in optimized_results:
        first_strategy = run[0]
        sharpe_analysis = first_strategy.analyzers.sharpe_ratio.get_analysis()
        sharpe = sharpe_analysis.get('sharperatio')

        if sharpe is not None and sharpe > best_sharpe_ratio:
            best_sharpe_ratio = sharpe
            best_buy_threshold = first_strategy.p.buy_threshold / 100.0
            best_sell_threshold = first_strategy.p.sell_threshold / 100.0

    if best_buy_threshold is not None and best_sell_threshold is not None:
        best_parameters[alpha_factor_column] = {
            'buy_threshold': best_buy_threshold,
            'sell_threshold': best_sell_threshold
        }
        print(f"\n--- Best In-Sample Parameters for {alpha_factor_column} ---")
        print(f"Best Buy Threshold: {best_buy_threshold:.4f}")
        print(f"Best Sell Threshold: {best_sell_threshold:.4f}")
        print(f"Best Sharpe Ratio: {best_sharpe_ratio:.4f}")
    else:
        print(f"\n--- No valid best parameters found for {alpha_factor_column} in the in-sample data. ---")

# --- Out-of-Sample Backtesting ---
print("\n--- Out-of-Sample Backtesting ---")
for alpha_factor_column, params in best_parameters.items():
    print(f"\n--- Out-of-Sample Test for Alpha Factor: {alpha_factor_column} ---")

    # Create a new cerebro instance for out-of-sample testing
    cerebro_test = bt.Cerebro()

    # Create a dictionary to store alpha factors indexed by date and ticker for the current alpha factor (out-of-sample)
    alpha_dict_out_of_sample = {}
    for index, row in out_of_sample_data.iterrows():
        date = row['Date'].date()
        ticker = row['Ticker']
        alpha_value = row[alpha_factor_column]
        alpha_dict_out_of_sample[(date, ticker)] = alpha_value

    # Add data for each stock (out-of-sample)
    tickers = out_of_sample_data['Ticker'].unique()
    for ticker in tickers:
        ticker_data_out_of_sample = out_of_sample_data[out_of_sample_data['Ticker'] == ticker].copy()

        data = bt.feeds.PandasData(
            dataname=ticker_data_out_of_sample,
            datetime='Date',
            close='Close',
            open='Open',
            high='High',
            low='Low',
            volume='Volume'
        )
        cerebro_test.adddata(data, name=ticker)

    # Add the strategy with the best parameters
    cerebro_test.addstrategy(AlphaFactorStrategy,
                             buy_threshold=params['buy_threshold'],
                             sell_threshold=params['sell_threshold'],
                             alpha_dict=alpha_dict_out_of_sample) # Pass alpha_dict here

    # Set the commission
    cerebro_test.broker.setcommission(commission=0.001)

    # Set initial cash
    cerebro_test.broker.setcash(100000)

    # Add the analyzers for out-of-sample testing
    cerebro_test.addanalyzer(bt.analyzers.AnnualReturn, _name='annual_return')
    cerebro_test.addanalyzer(bt.analyzers.SharpeRatio, _name='sharpe_ratio', riskfreerate=0.02)
    cerebro_test.addanalyzer(bt.analyzers.DrawDown, _name='drawdown')
    cerebro_test.addanalyzer(bt.analyzers.TradeAnalyzer, _name='trade_analyzer')
    cerebro_test.addanalyzer(bt.analyzers.Returns, _name='returns')
    cerebro_test.addanalyzer(bt.analyzers.TimeReturn, _name='time_return')

    # Run the out-of-sample backtest
    print("\n--- Running Out-of-Sample Backtest ---")
    results_test = cerebro_test.run()

    # Analyze and print out-of-sample results
    if results_test:
        first_strategy_test = results_test[0]
        print("\n--- Out-of-Sample Performance ---")
        print(f"Alpha Factor: {alpha_factor_column}")
        print(f"Best Buy Threshold (In-Sample): {params['buy_threshold']:.4f}")
        print(f"Best Sell Threshold (In-Sample): {params['sell_threshold']:.4f}")

        # Print analyzers' results
        print("\n--- Analyzers ---")
        annual_return = first_strategy_test.analyzers.annual_return.get_analysis()
        print("Annual Return:", annual_return)

        sharpe_ratio = first_strategy_test.analyzers.sharpe_ratio.get_analysis()
        print("Sharpe Ratio:", sharpe_ratio)

        drawdown = first_strategy_test.analyzers.drawdown.get_analysis()
        print("Drawdown:", drawdown)

        trade_analyzer = first_strategy_test.analyzers.trade_analyzer.get_analysis()
        print("Trade Analyzer:", trade_analyzer)

        returns = first_strategy_test.analyzers.returns.get_analysis()
        print("Returns:", returns)

        time_return = first_strategy_test.analyzers.time_return.get_analysis()
        print("Time Return:", time_return)


--- In-Sample Optimization for Alpha Factor: Price_Momentum ---

--- Best In-Sample Parameters for Price_Momentum ---
Best Buy Threshold: 0.0100
Best Sell Threshold: -0.0100
Best Sharpe Ratio: -6.4383

--- In-Sample Optimization for Alpha Factor: Volume_Momentum ---

--- Best In-Sample Parameters for Volume_Momentum ---
Best Buy Threshold: 0.0100
Best Sell Threshold: -0.0400
Best Sharpe Ratio: -6.8043

--- In-Sample Optimization for Alpha Factor: Rate_of_Change_Momentum ---

--- Best In-Sample Parameters for Rate_of_Change_Momentum ---
Best Buy Threshold: 0.0100
Best Sell Threshold: -0.0400
Best Sharpe Ratio: -5.5065

--- In-Sample Optimization for Alpha Factor: Mean_Reversion_20 ---

--- Best In-Sample Parameters for Mean_Reversion_20 ---
Best Buy Threshold: 0.0300
Best Sell Threshold: -0.0400
Best Sharpe Ratio: -5.3998

--- In-Sample Optimization for Alpha Factor: Z_score_Mean_Reversion ---

--- Best In-Sample Parameters for Z_score_Mean_Reversion ---
Best Buy Threshold: 0.0400
Best

# Agent for summarizing up-to-date market information

In [23]:
import json
import time
import os
from langchain_community.llms import Ollama
from ollama import Client
import pandas as pd
import re
import ast
import numpy as np
from collections import Counter
import yfinance as yf

# Assuming you have a way to set up your Ollama client
client = Client(host='http://localhost:11434')  # Adjust host if needed

def prompt_to_LLM_market_info(hsi_volatility_paths, hsi_index_ticker, max_retries=10):
    """
    Fetches market information from Ollama LLM using HSI Volatility Index and Hang Seng Index data from yfinance.

    Args:
        hsi_volatility_paths (list): List of file paths for the three HSI Volatility Index CSV files.
        hsi_index_ticker (str): Ticker symbol for the Hang Seng Index (e.g., '^HSI').
        max_retries (int): Maximum number of retries for the LLM call.

    Returns:
        str: The summarized market information from the LLM, or None if an error occurs.
    """
    try:
        # Load and combine HSI Volatility Index data
        hsi_volatility_dfs = [pd.read_csv(path) for path in hsi_volatility_paths]
        hsi_volatility_df = pd.concat(hsi_volatility_dfs, ignore_index=True)
        if 'Date' in hsi_volatility_df.columns:
            hsi_volatility_df['Date'] = pd.to_datetime(hsi_volatility_df['Date'])
            hsi_volatility_df = hsi_volatility_df.sort_values(by='Date').drop_duplicates(subset=['Date'], keep='last').reset_index(drop=True)
        else:
            print("Warning: 'Date' column not found in HSI Volatility Index data.")

        # Fetch Hang Seng Index data from yfinance
        start_date = '2020-03-24'
        end_date = '2025-03-24'
        hsi_index_df = yf.download(hsi_index_ticker, start=start_date, end=end_date)
        hsi_index_df = hsi_index_df.reset_index() # Make 'Date' a regular column

        llm = Ollama(model="llama3.2")

        prompt = f"""
        You are a financial analyst providing up-to-date market information for Hong Kong.
        Based on the following data, summarize the recent market trends and potential outlook comprehensively. 
        Provide quantitative and qualitative up-to-date market information with no disclaimers. 

        HSI Volatility Index Data (Recent):
        {hsi_volatility_df.tail(10).to_string()}

        Hang Seng Index Data (from 2020-03-24 to 2025-03-24, recent shown):
        {hsi_index_df.tail(10).to_string()}
        """

        response = llm.invoke(prompt)
        return response

    except Exception as e:
        print(f"Error during market information retrieval: {e}")
        return None

print('Now doing market information')

# --- Define file paths ---
hsi_volatility_file1 = 'HSI Volatility Historical Data_1.csv'  # Replace with your actual file path
hsi_volatility_file2 = 'HSI Volatility Historical Data_2.csv'  # Replace with your actual file path
hsi_volatility_file3 = 'HSI Volatility Historical Data_3.csv'  # Replace with your actual file path
hsi_volatility_paths = [hsi_volatility_file1, hsi_volatility_file2, hsi_volatility_file3]

hang_seng_ticker = '^HSI'  # Ticker symbol for Hang Seng Index

# --- Call the function to get market information ---
market_summary = prompt_to_LLM_market_info(hsi_volatility_paths, hang_seng_ticker)

if market_summary:
    print("\n--- Market Information Summary ---")
    print(market_summary)
else:
    print("\nCould not retrieve market information.")

  hsi_volatility_df['Date'] = pd.to_datetime(hsi_volatility_df['Date'])
[*********************100%***********************]  1 of 1 completed

Now doing market information






--- Market Information Summary ---
Based on the provided data, here is a comprehensive summary of recent market trends and potential outlook for Hong Kong:

**Hang Seng Index (HSI) Analysis**

The HSI has been experiencing volatility since its last major pullback in March 2023. The index has recovered significantly, reaching new highs in May 2024.

1. **Recent Trends**: Since its peak on May 2024, the HSI has experienced a gentle correction, with prices ranging between 25,000 and 28,000. The recent volatility data indicates a moderate increase in price swings, suggesting that investors are becoming increasingly cautious.
2. **Volatility Index (VIX)**: The VIX index measures market expectation of future volatility. As the VIX has remained relatively stable around 20-25, it suggests that investor sentiment is neutral to slightly bullish, indicating a potential return to growth.
3. **Price Levels**: Recent high prices indicate strong demand for HSI stocks, particularly in the mid-to-high