<a href="https://colab.research.google.com/github/hck717/simple-Dividend-Aristocrats-Strategy/blob/main/long_term_invest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Dividend Aristocrats Strategy

In [None]:
!pip install --upgrade yfinance

get stock list

In [None]:
import yfinance as yf
import pandas as pd

# Initial list from your example (HSI constituents or similar)
hsi_stocks = [
    '0001.HK', '0002.HK', '0003.HK', '0005.HK', '0011.HK', '0012.HK', '0016.HK',
    '0017.HK', '0019.HK', '0027.HK', '0083.HK', '0101.HK', '0129.HK', '0135.HK', '0142.HK',
    '0151.HK', '0168.HK', '0175.HK', '0182.HK', '0267.HK', '0288.HK', '0291.HK', '0293.HK',
    '0316.HK', '0322.HK', '0323.HK', '0330.HK', '0386.HK', '0388.HK', '0390.HK', '0489.HK',
    '0669.HK', '0688.HK', '0700.HK', '0728.HK', '0762.HK', '0777.HK', '0788.HK', '0823.HK',
    '0836.HK', '0857.HK', '0868.HK', '0881.HK', '0883.HK', '0916.HK', '0939.HK', '0941.HK',
    '0945.HK', '0960.HK', '0992.HK', '0998.HK', '1038.HK', '1044.HK', '1071.HK', '1088.HK',
    '1093.HK', '1109.HK', '1113.HK', '1177.HK', '1299.HK', '1378.HK', '1398.HK', '1810.HK',
    '1876.HK', '1928.HK', '1997.HK', '2007.HK', '2020.HK', '2269.HK', '2313.HK', '2318.HK',
    '2319.HK', '2331.HK', '2382.HK', '2388.HK', '2628.HK', '2688.HK', '3328.HK', '3988.HK',
    '0066.HK', '2638.HK'
]

# Function to check if a ticker is valid
def is_valid_ticker(ticker):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info
        # Check if 'symbol' exists and matches the ticker (without .HK)
        return info.get('symbol', '').replace('.HK', '') == ticker.replace('.HK', '')
    except Exception:
        return False

# Generate and validate HKEX tickers
hk_stocks = set(hsi_stocks)  # Start with the known list, use set to avoid duplicates
target_count = 1000
ticker_range = range(1, 2001)  # Test 0001.HK to 2000.HK (adjust if needed)

print("Validating tickers...")
for i in ticker_range:
    if len(hk_stocks) >= target_count:
        break
    ticker = f"{i:04d}.HK"  # Format as 4-digit code (e.g., "0001.HK")
    if ticker not in hk_stocks and is_valid_ticker(ticker):
        hk_stocks.add(ticker)
        if len(hk_stocks) % 100 == 0:  # Progress update
            print(f"Found {len(hk_stocks)} valid stocks...")

# Convert to list and sort
hk_stocks = sorted(list(hk_stocks))

# Format as requested
print(f"\nFound {len(hk_stocks)} stocks. Here’s the list formatted for Python:")
print("hk_stocks = [")
for i, ticker in enumerate(hk_stocks):
    end = "," if i < len(hk_stocks) - 1 else ""
    print(f"    '{ticker}'{end}")
print("]")

# Optional: Save to CSV
df = pd.DataFrame(hk_stocks, columns=["Stock Code"])
df.to_csv("hkex_1000_stocks.csv", index=False)
print("\nSaved to 'hkex_1000_stocks.csv'")

Portfolio making

In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

try:
    from google.colab import files
except ImportError:
    files = None

def preprocess_stocks(hk_stocks):
    end_date = datetime(2025, 3, 9)
    start_date = datetime(2015, 1, 1)
    tickers = [yf.Ticker(t) for t in hk_stocks]

    try:
        data = yf.download(hk_stocks + ['^HSI'], start=start_date, end=end_date, progress=True, auto_adjust=False)
        prices = data['Close'].dropna(how='all')
        returns = prices.pct_change(fill_method=None).dropna(how='all')
        hsi_returns = returns.get('^HSI', pd.Series())
    except Exception as e:
        print(f"Error downloading data: {e}")
        prices = pd.DataFrame(index=pd.date_range(start_date, end_date))
        returns = pd.DataFrame(index=pd.date_range(start_date, end_date))
        hsi_returns = pd.Series(index=pd.date_range(start_date, end_date))

    metrics = pd.DataFrame(index=hk_stocks)
    for i, t in enumerate(tickers):
        try:
            info = t.info
            metrics.loc[t.ticker, 'ROE'] = float(info.get('returnOnEquity', 0)) * 100
            metrics.loc[t.ticker, 'PE_Ratio'] = float(info.get('trailingPE', np.nan))
            metrics.loc[t.ticker, 'PB_Ratio'] = float(info.get('priceToBook', np.nan))
            metrics.loc[t.ticker, 'Sector'] = info.get('sector', 'Unknown')
        except Exception as e:
            print(f"Error fetching info for {t.ticker}: {e}")
            metrics.loc[t.ticker, 'ROE'] = 0
            metrics.loc[t.ticker, 'PE_Ratio'] = np.nan
            metrics.loc[t.ticker, 'PB_Ratio'] = np.nan
            metrics.loc[t.ticker, 'Sector'] = 'Unknown'

    for t in hk_stocks:
        if t in returns.columns:
            metrics.loc[t, 'Volatility'] = returns[t].std() * (252 ** 0.5)
            metrics.loc[t, 'Beta'] = returns[t].cov(hsi_returns) / hsi_returns.var() if hsi_returns.var() > 0 else 0
            metrics.loc[t, 'Max_Drawdown'] = ((prices[t] - prices[t].cummax()) / prices[t].cummax()).min() * 100 if t in prices.columns else 0
        else:
            metrics.loc[t, 'Volatility'] = np.nan
            metrics.loc[t, 'Beta'] = np.nan
            metrics.loc[t, 'Max_Drawdown'] = np.nan
        metrics.loc[t, 'Market_Price'] = prices[t].iloc[-1] if t in prices.columns and not prices[t].isna().all() else np.nan

    cleaned_metrics = metrics.dropna(subset=['Market_Price'])
    cleaned_stocks = cleaned_metrics.index.tolist()
    print(f"\nOriginal stocks: {len(hk_stocks)}, Cleaned stocks: {len(cleaned_stocks)}")
    return cleaned_stocks, cleaned_metrics, returns, hsi_returns, prices

def value_investing(hk_stocks, prices):
    tickers = [yf.Ticker(t) for t in hk_stocks]
    metrics = pd.DataFrame(index=hk_stocks)
    for t in tickers:
        try:
            info = t.info
            metrics.loc[t.ticker, 'ROE'] = float(info.get('returnOnEquity', 0)) * 100
            metrics.loc[t.ticker, 'ROIC'] = float(info.get('returnOnAssets', 0)) * 100
            metrics.loc[t.ticker, 'Total_Debt'] = float(info.get('totalDebt', 0))
            metrics.loc[t.ticker, 'Equity'] = float(info.get('totalStockholderEquity', 1e-6))
            metrics.loc[t.ticker, 'Profit_Margin'] = float(info.get('profitMargins', 0)) * 100
            metrics.loc[t.ticker, 'PE_Ratio'] = float(info.get('trailingPE', np.nan))
            metrics.loc[t.ticker, 'PB_Ratio'] = float(info.get('priceToBook', np.nan))
            metrics.loc[t.ticker, 'Owner_Earnings'] = float(info.get('netIncomeToCommon', 0)) + float(info.get('depreciation', 0)) - float(info.get('capitalExpenditures', 0))
        except Exception as e:
            print(f"Error fetching value info for {t.ticker}: {e}")
            metrics.loc[t.ticker, 'ROE'] = 0
            metrics.loc[t.ticker, 'ROIC'] = 0
            metrics.loc[t.ticker, 'Total_Debt'] = 0
            metrics.loc[t.ticker, 'Equity'] = 1e-6
            metrics.loc[t.ticker, 'Profit_Margin'] = 0
            metrics.loc[t.ticker, 'PE_Ratio'] = np.nan
            metrics.loc[t.ticker, 'PB_Ratio'] = np.nan
            metrics.loc[t.ticker, 'Owner_Earnings'] = 0

    metrics['Debt_to_Equity'] = metrics['Total_Debt'] / metrics['Equity']
    metrics['Market_Price'] = [prices[t].iloc[-1] if t in prices.columns and not prices[t].isna().all() else np.nan for t in hk_stocks]
    growth_rate = 0.03
    metrics['Intrinsic_Value'] = metrics['Owner_Earnings'] * (1 + growth_rate) / (0.10 - growth_rate)
    metrics['Discount'] = np.where(metrics['Intrinsic_Value'] != 0,
                                   (metrics['Intrinsic_Value'] - metrics['Market_Price']) / metrics['Intrinsic_Value'], 0)

    metrics['ROE'] = pd.to_numeric(metrics['ROE'], errors='coerce')
    metrics['ROIC'] = pd.to_numeric(metrics['ROIC'], errors='coerce')
    metrics['Debt_to_Equity'] = pd.to_numeric(metrics['Debt_to_Equity'], errors='coerce')
    metrics['Profit_Margin'] = pd.to_numeric(metrics['Profit_Margin'], errors='coerce')
    metrics['PE_Ratio'] = pd.to_numeric(metrics['PE_Ratio'], errors='coerce')
    metrics['PB_Ratio'] = pd.to_numeric(metrics['PB_Ratio'], errors='coerce')
    metrics['Discount'] = pd.to_numeric(metrics['Discount'], errors='coerce')

    metrics['ROE_Score'] = np.clip((metrics['ROE'] - 15) / (metrics['ROE'].max() - 15 + 1e-6), 0, 1)
    metrics['ROIC_Score'] = np.clip((metrics['ROIC'] - 10) / (metrics['ROIC'].max() - 10 + 1e-6), 0, 1)
    metrics['Debt_Score'] = np.clip(1 - (metrics['Debt_to_Equity'] / (metrics['Debt_to_Equity'].max() + 1e-6)), 0, 1)
    metrics['Profit_Score'] = np.clip((metrics['Profit_Margin'] - 5) / (metrics['Profit_Margin'].max() - 5 + 1e-6), 0, 1)
    metrics['PE_Score'] = np.clip(1 - (metrics['PE_Ratio'] - metrics['PE_Ratio'].min()) / (metrics['PE_Ratio'].max() - metrics['PE_Ratio'].min() + 1e-6), 0, 1)
    metrics['PB_Score'] = np.clip(1 - (metrics['PB_Ratio'] - metrics['PB_Ratio'].min()) / (metrics['PB_Ratio'].max() - metrics['PB_Ratio'].min() + 1e-6), 0, 1)
    metrics['Discount_Score'] = np.clip(metrics['Discount'], 0, 1)
    metrics['Value_Score'] = np.nanmean([metrics['ROE_Score'], metrics['ROIC_Score'], metrics['Debt_Score'],
                                         metrics['Profit_Score'], metrics['PE_Score'], metrics['PB_Score'],
                                         metrics['Discount_Score']], axis=0)

    return metrics[['ROE', 'PE_Ratio', 'PB_Ratio', 'Value_Score', 'Intrinsic_Value', 'ROIC']]

def magic_formula(hk_stocks, prices):
    tickers = [yf.Ticker(t) for t in hk_stocks]
    metrics = pd.DataFrame(index=hk_stocks)
    for t in tickers:
        try:
            info = t.info
            metrics.loc[t.ticker, 'EBIT'] = float(info.get('ebitda', 0))
            metrics.loc[t.ticker, 'EV'] = float(info.get('enterpriseValue', 1e-6))
            metrics.loc[t.ticker, 'ROIC'] = float(info.get('returnOnAssets', 0)) * 100
        except Exception as e:
            print(f"Error fetching magic info for {t.ticker}: {e}")
            metrics.loc[t.ticker, 'EBIT'] = 0
            metrics.loc[t.ticker, 'EV'] = 1e-6
            metrics.loc[t.ticker, 'ROIC'] = 0

    metrics['Earnings_Yield'] = metrics['EBIT'] / metrics['EV'] * 100
    metrics['Market_Price'] = [prices[t].iloc[-1] if t in prices.columns and not prices[t].isna().all() else np.nan for t in hk_stocks]
    metrics['EY_Score'] = np.clip((metrics['Earnings_Yield'] - 6) / (metrics['Earnings_Yield'].max() - 6 + 1e-6), 0, 1)
    metrics['ROIC_Score'] = np.clip((metrics['ROIC'] - 10) / (metrics['ROIC'].max() - 10 + 1e-6), 0, 1)
    metrics['Magic_Score'] = np.nanmean([metrics['EY_Score'], metrics['ROIC_Score']], axis=0)

    return metrics[['Magic_Score']]

def low_volatility(hk_stocks, returns, hsi_returns, prices):
    tickers = [yf.Ticker(t) for t in hk_stocks]
    metrics = pd.DataFrame(index=hk_stocks)
    for t in hk_stocks:
        if t in returns.columns:
            metrics.loc[t, 'Volatility'] = returns[t].std() * (252 ** 0.5)
            metrics.loc[t, 'Beta'] = returns[t].cov(hsi_returns) / hsi_returns.var() if hsi_returns.var() > 0 else 0
            metrics.loc[t, 'Max_Drawdown'] = ((prices[t] - prices[t].cummax()) / prices[t].cummax()).min() * 100 if t in prices.columns else 0
            try:
                X = sm.add_constant(hsi_returns)
                model = sm.OLS(returns[t], X).fit()
                metrics.loc[t, 'Idio_Vol'] = model.resid.std() * (252 ** 0.5)
            except Exception:
                metrics.loc[t, 'Idio_Vol'] = np.nan
        else:
            metrics.loc[t, 'Volatility'] = np.nan
            metrics.loc[t, 'Beta'] = np.nan
            metrics.loc[t, 'Max_Drawdown'] = np.nan
            metrics.loc[t, 'Idio_Vol'] = np.nan
        metrics.loc[t, 'Market_Price'] = prices[t].iloc[-1] if t in prices.columns and not prices[t].isna().all() else np.nan
        metrics.loc[t, 'Sector'] = tickers[hk_stocks.index(t)].info.get('sector', 'Unknown') if t in hk_stocks else 'Unknown'

    metrics['Vol_Score'] = np.clip(1 - (metrics['Volatility'] - metrics['Volatility'].min()) / (metrics['Volatility'].max() - metrics['Volatility'].min() + 1e-6), 0, 1)
    metrics['Beta_Score'] = np.clip(1 - (metrics['Beta'] - metrics['Beta'].min()) / (metrics['Beta'].max() - metrics['Beta'].min() + 1e-6), 0, 1)
    metrics['Drawdown_Score'] = np.clip((metrics['Max_Drawdown'] - metrics['Max_Drawdown'].min()) / (max(metrics['Max_Drawdown']) - min(metrics['Max_Drawdown']) + 1e-6), 0, 1)
    metrics['Idio_Score'] = np.clip(1 - (metrics['Idio_Vol'] - metrics['Idio_Vol'].min()) / (metrics['Idio_Vol'].max() - metrics['Idio_Vol'].min() + 1e-6), 0, 1)
    metrics['Low_Vol_Score'] = np.nanmean([metrics['Vol_Score'], metrics['Beta_Score'], metrics['Drawdown_Score'], metrics['Idio_Score']], axis=0)
    metrics['Brian_Score'] = np.nanmean([metrics['Vol_Score'], metrics['Drawdown_Score']], axis=0)

    return metrics[['Volatility', 'Beta', 'Max_Drawdown', 'Low_Vol_Score', 'Brian_Score', 'Sector']]

def dividend_aristocrats(hk_stocks, prices):
    end_date = datetime(2025, 3, 9)
    start_date = datetime(2015, 1, 1)
    tickers = [yf.Ticker(t) for t in hk_stocks]
    div_data = []
    for t in tickers:
        try:
            info = t.info
            share_price = float(info.get('previousClose', prices[t.ticker].iloc[-1] if t.ticker in prices else 1))
            dividends = t.dividends
            fcf = float(info.get('freeCashflow', 0))
            if dividends.empty or len(dividends) < 2:
                yield_pct = 0
                payout_ratio = 0
                growth = 0
                fcf_coverage = 0
            else:
                dividends.index = dividends.index.tz_localize(None)
                ttm_div = dividends[(dividends.index >= (end_date - timedelta(days=365))) &
                                    (dividends.index <= end_date)].sum()
                yield_pct = min((ttm_div / share_price * 100) if share_price > 0 else 0, 20)
                eps = float(info.get('trailingEps', 1e-6))
                payout_ratio = (ttm_div / eps) * 100 if eps != 0 else 0
                div_10yr = dividends[(dividends.index >= start_date) & (dividends.index <= end_date)]
                yearly_div = div_10yr.resample('YE').sum()
                zero_div_years = (yearly_div == 0).sum()
                if zero_div_years > 1:
                    growth = 0
                else:
                    start_div = yearly_div[yearly_div > 0].iloc[0] if (yearly_div > 0).any() else 1e-6
                    end_div = yearly_div[yearly_div > 0].iloc[-1] if (yearly_div > 0).any() else 1e-6
                    years = (end_date.year - start_date.year) - zero_div_years
                    growth = ((end_div / start_div) ** (1 / max(years, 1)) - 1) * 100 if start_div > 0 else 0
                fcf_coverage = fcf / ttm_div if ttm_div > 0 else 0
            div_data.append({
                'Ticker': t.ticker,
                'Dividend_Yield': yield_pct,
                'Payout_Ratio': payout_ratio,
                'Dividend_Growth': growth,
                'FCF_Coverage': fcf_coverage,
                'Market_Price': prices[t.ticker].iloc[-1] if t.ticker in prices else np.nan
            })
        except Exception as e:
            print(f"Error fetching dividend info for {t.ticker}: {e}")
            div_data.append({'Ticker': t.ticker, 'Dividend_Yield': 0, 'Payout_Ratio': 0, 'Dividend_Growth': 0, 'FCF_Coverage': 0, 'Market_Price': np.nan})

    metrics = pd.DataFrame(div_data).set_index('Ticker')
    metrics['Yield_Score'] = np.clip((metrics['Dividend_Yield'] - metrics['Dividend_Yield'].min()) / (metrics['Dividend_Yield'].max() - metrics['Dividend_Yield'].min() + 1e-6), 0, 1)
    metrics['Payout_Score'] = np.clip(1 - (metrics['Payout_Ratio'] - metrics['Payout_Ratio'].min()) / (metrics['Payout_Ratio'].max() - metrics['Payout_Ratio'].min() + 1e-6), 0, 1)
    metrics['Growth_Score'] = np.clip((metrics['Dividend_Growth'] - 5) / (metrics['Dividend_Growth'].max() - 5 + 1e-6), 0, 1)
    metrics['FCF_Score'] = np.clip((metrics['FCF_Coverage'] - 2) / (metrics['FCF_Coverage'].max() - 2 + 1e-6), 0, 1)
    metrics['Div_Score'] = np.nanmean([metrics['Yield_Score'], metrics['Payout_Score'], metrics['Growth_Score'], metrics['FCF_Score']], axis=0)

    return metrics[['Dividend_Yield', 'Payout_Ratio', 'Dividend_Growth', 'Div_Score', 'Market_Price']]

def portfolio_selection(hk_stocks, returns, hsi_returns, prices, portfolio_size=5):
    # Buffett-Inspired Value Investing Strategy
    value_df = value_investing(hk_stocks, prices)
    # Greenblatt-Inspired Magic Formula Strategy
    magic_df = magic_formula(hk_stocks, prices)
    # Low Volatility Strategy
    low_vol_df = low_volatility(hk_stocks, returns, hsi_returns, prices)
    # Dividend Aristocrats Strategy
    div_df = dividend_aristocrats(hk_stocks, prices)

    # Combine all strategy dataframes into a single dataframe
    combined = pd.DataFrame(index=hk_stocks)
    combined = combined.join(value_df[['ROE', 'PE_Ratio', 'PB_Ratio', 'Value_Score', 'Intrinsic_Value', 'ROIC']], how='left')
    combined = combined.join(magic_df[['Magic_Score']], how='left')
    combined = combined.join(low_vol_df[['Volatility', 'Beta', 'Max_Drawdown', 'Low_Vol_Score', 'Brian_Score', 'Sector']], how='left')
    combined = combined.join(div_df[['Dividend_Yield', 'Payout_Ratio', 'Dividend_Growth', 'Div_Score', 'Market_Price']], how='left')
    combined['Composite_Score'] = np.nanmean([combined['Value_Score'], combined['Magic_Score'], combined['Low_Vol_Score'], combined['Div_Score']], axis=0)

    # Calculate HSI Correlation over the last 5 years
    five_year_start = datetime(2020, 3, 9)
    recent_returns = returns[five_year_start:] if not returns.empty else pd.DataFrame()
    hsi_recent_returns = hsi_returns[five_year_start:] if not hsi_returns.empty else pd.Series()
    combined['HSI_Correlation'] = [recent_returns[t].rolling(252).corr(hsi_recent_returns).mean() if t in recent_returns.columns and not recent_returns[t].isna().all() else 0 for t in hk_stocks]

    # Fetch additional data for highs, lows, and volumes
    end_date = datetime(2025, 3, 9)
    start_date = datetime(2015, 1, 1)
    tickers = [yf.Ticker(t) for t in hk_stocks]
    try:
        highs = yf.download(hk_stocks + ['^HSI'], start=start_date, end=end_date, auto_adjust=False)['High']
        lows = yf.download(hk_stocks + ['^HSI'], start=start_date, end=end_date, auto_adjust=False)['Low']
        volumes = yf.download(hk_stocks + ['^HSI'], start=start_date, end=end_date, auto_adjust=False)['Volume']
    except Exception as e:
        print(f"Error downloading highs/lows/volumes: {e}")
        highs = lows = volumes = pd.DataFrame(index=prices.index)

    # Define risk-free rate and calculate years
    risk_free_rate = 0.02
    years = (end_date - start_date).days / 365.25

    # Calculate additional metrics for each stock
    for t in tickers:
        ticker = t.ticker
        try:
            if ticker in prices.columns and not prices[ticker].isna().all():
                price_start = prices[ticker].iloc[0]
                price_end = prices[ticker].iloc[-1]
                dividends = t.dividends
                if not dividends.empty:
                    dividends.index = dividends.index.tz_localize(None)
                    div_1yr_ago = dividends[(dividends.index >= (end_date - timedelta(days=730))) &
                                            (dividends.index < (end_date - timedelta(days=365)))].sum()
                    div_now = dividends[(dividends.index >= (end_date - timedelta(days=365))) &
                                        (dividends.index <= end_date)].sum()
                    total_dividends = dividends[(dividends.index >= start_date) & (dividends.index <= end_date)].sum()
                else:
                    div_1yr_ago = 0
                    div_now = 0
                    total_dividends = 0
                total_return = (price_end + total_dividends - price_start) / price_start if price_start != 0 else 0
                annualized_return = ((1 + total_return) ** (1 / years) - 1) * 100 if total_return != np.nan else 0
                daily_returns = returns[ticker] if ticker in returns.columns else pd.Series()
                sharpe_ratio = ((daily_returns.mean() * 252 - risk_free_rate) /
                               (daily_returns.std() * np.sqrt(252))) if daily_returns.std() > 0 else 0
                volume_avg = volumes[ticker].mean() if ticker in volumes.columns else 0
                amplitude = ((highs[ticker] - lows[ticker]) / prices[ticker]).mean() * 100 if ticker in highs.columns else 0
                name = t.info.get('longName', ticker)

                combined.loc[ticker, 'Annualized_Return'] = annualized_return
                combined.loc[ticker, 'Sharpe_Ratio'] = sharpe_ratio
                combined.loc[ticker, 'Volume'] = volume_avg
                combined.loc[ticker, 'Amplitude'] = amplitude
                combined.loc[ticker, 'Name'] = name
                combined.loc[ticker, 'Dividend_Cut'] = div_now < div_1yr_ago
            else:
                combined.loc[ticker, 'Annualized_Return'] = np.nan
                combined.loc[ticker, 'Sharpe_Ratio'] = np.nan
                combined.loc[ticker, 'Volume'] = np.nan
                combined.loc[ticker, 'Amplitude'] = np.nan
                combined.loc[ticker, 'Name'] = ticker
                combined.loc[ticker, 'Dividend_Cut'] = False
        except Exception as e:
            print(f"Error processing {ticker}: {e}")
            combined.loc[ticker, 'Annualized_Return'] = np.nan
            combined.loc[ticker, 'Sharpe_Ratio'] = np.nan
            combined.loc[ticker, 'Volume'] = np.nan
            combined.loc[ticker, 'Amplitude'] = np.nan
            combined.loc[ticker, 'Name'] = ticker
            combined.loc[ticker, 'Dividend_Cut'] = False

    # Save all evaluations to CSV
    csv_filename = 'all_stock_evaluations.csv'
    combined.to_csv(csv_filename, index=True)
    print(f"All stock evaluations saved to '{csv_filename}'")

    if files is not None:
        files.download(csv_filename)
        print(f"Downloading '{csv_filename}' in Google Colab")
    else:
        print("Not in Google Colab; file saved locally. Please download manually.")

    # Filter eligible stocks
    combined['Sell'] = (combined['Dividend_Cut']) | (combined['Market_Price'] > 1.2 * combined['Intrinsic_Value']) | (combined['ROIC'] < 8)
    eligible = combined[~combined['Sell']].copy()
    eligible = eligible[eligible['HSI_Correlation'] <= 0.7].copy()

    # Calculate performance scores
    max_return = eligible['Annualized_Return'].max()
    min_vol = eligible['Volatility'].min()
    max_vol = eligible['Volatility'].max()
    min_drawdown = eligible['Max_Drawdown'].abs().min()
    max_drawdown = eligible['Max_Drawdown'].abs().max()
    max_corr = eligible['HSI_Correlation'].max()

    eligible['Return_Score'] = eligible['Annualized_Return'] / (max_return + 1e-6)
    eligible['Vol_Score'] = 1 - (eligible['Volatility'] - min_vol) / (max_vol - min_vol + 1e-6)
    eligible['Drawdown_Score'] = 1 - (eligible['Max_Drawdown'].abs() - min_drawdown) / (max_drawdown - min_drawdown + 1e-6)
    eligible['Corr_Score'] = 1 - eligible['HSI_Correlation'] / (max_corr + 1e-6)
    eligible['Performance_Score'] = (
        0.4 * eligible['Return_Score'] +
        0.2 * eligible['Vol_Score'] +
        0.2 * eligible['Drawdown_Score'] +
        0.2 * eligible['Corr_Score']
    )

    # Select top portfolio
    portfolio = eligible.sort_values(by='Performance_Score', ascending=False).head(portfolio_size)
    portfolio_tickers = portfolio.index.tolist()
    print(f"Selected Stocks: {', '.join(portfolio['Name'])}")

    # Calculate portfolio metrics
    if portfolio_tickers:
        # Equal-weighted portfolio returns
        portfolio_returns = returns[portfolio_tickers].mean(axis=1).dropna()
        portfolio_cum_returns = (1 + portfolio_returns).cumprod()
        portfolio_prices = portfolio_cum_returns * 100  # Normalized to 100 for visualization
        total_return = portfolio_cum_returns.iloc[-1] - 1
        years = (end_date - start_date).days / 365.25
        annualized_return = ((1 + total_return) ** (1 / years) - 1) * 100
        volatility = portfolio_returns.std() * np.sqrt(252)
        # Sharpe Ratio using annualized return directly
        sharpe = (annualized_return / 100 - risk_free_rate) / volatility if volatility > 0 else 0
        portfolio_drawdown = (portfolio_cum_returns - portfolio_cum_returns.cummax()) / portfolio_cum_returns.cummax() * 100
        max_drawdown = portfolio_drawdown.min()
        beta = portfolio_returns.cov(hsi_returns) / hsi_returns.var() if hsi_returns.var() > 0 else 0
        var = np.percentile(portfolio_returns.dropna(), 5) * np.sqrt(252) * 100
        # Correlation over 5-year period for consistency with HSI_Correlation
        five_year_start = datetime(2020, 3, 9)
        portfolio_corr = portfolio_returns[five_year_start:].corr(hsi_returns[five_year_start:]) if not portfolio_returns[five_year_start:].empty else 0
        hsi_vol = hsi_returns.std() * (252 ** 0.5)
        hedging_needed = hsi_vol > 0.25
    else:
        portfolio_returns = pd.Series()
        portfolio_prices = pd.Series()
        total_return = 0
        annualized_return = 0
        volatility = 0
        sharpe = 0
        portfolio_drawdown = pd.Series([0])
        max_drawdown = 0
        beta = 0
        var = 0
        portfolio_corr = 0
        hsi_vol = 0
        hedging_needed = False

    # Print portfolio metrics
    print(f"Portfolio Metrics (Top {portfolio_size} Stocks):")
    print(f"Annualized Return: {annualized_return:.2f}%")
    print(f"Volatility: {volatility:.2f}")
    print(f"Sharpe Ratio: {sharpe:.2f}")
    print(f"Max Drawdown: {max_drawdown:.2f}%")
    print(f"Beta: {beta:.2f}")
    print(f"Value at Risk (95%, Annualized): {var:.2f}%")
    print(f"Portfolio-HSI Correlation: {portfolio_corr:.2f}")
    print(f"HSI Volatility: {hsi_vol:.2f}, Hedging Needed: {hedging_needed}")

    return portfolio, portfolio_corr, hedging_needed, portfolio_returns, portfolio_prices, hsi_returns

def visualize_portfolio(portfolio, portfolio_corr, portfolio_returns, portfolio_prices, hsi_returns):
    plt.figure(figsize=(12, 6))
    plt.bar(portfolio.index, portfolio['Performance_Score'], color='skyblue')
    plt.title(f'Portfolio Stocks (HSI Correlation: {portfolio_corr:.2f})')
    plt.xlabel('Ticker')
    plt.ylabel('Performance Score (0-1)')
    plt.xticks(rotation=45)
    for i, v in enumerate(portfolio['Performance_Score']):
        plt.text(i, v + 0.01, f"{v:.2f}", ha='center')
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 6))
    portfolio_cum_returns = (1 + portfolio_returns).cumprod() - 1 if not portfolio_returns.empty else pd.Series([0])
    hsi_cum_returns = (1 + hsi_returns).cumprod() - 1 if not hsi_returns.empty else pd.Series([0])
    plt.plot(portfolio_cum_returns.index, portfolio_cum_returns * 100, label='Portfolio', color='blue')
    plt.plot(hsi_cum_returns.index, hsi_cum_returns * 100, label='HSI', color='orange')
    plt.title('Cumulative Returns: Portfolio vs HSI')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Return (%)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 6))
    portfolio_cum_returns = (1 + portfolio_returns).cumprod() if not portfolio_returns.empty else pd.Series([1])
    portfolio_drawdown = (portfolio_cum_returns - portfolio_cum_returns.cummax()) / portfolio_cum_returns.cummax() * 100
    plt.plot(portfolio_drawdown.index, portfolio_drawdown, color='red')
    plt.title('Portfolio Drawdown')
    plt.xlabel('Date')
    plt.ylabel('Drawdown (%)')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(8, 8))
    sector_counts = portfolio['Sector'].value_counts()
    plt.pie(sector_counts, labels=sector_counts.index, autopct='%1.1f%%', startangle=90)
    plt.title('Portfolio Sector Allocation')
    plt.axis('equal')
    plt.tight_layout()
    plt.show()

    print("Selected Portfolio (Top 5 Stocks):")
    print(portfolio[['Name', 'Sector', 'ROE', 'PE_Ratio', 'PB_Ratio', 'Value_Score',
                     'Volatility', 'Beta', 'Max_Drawdown', 'Low_Vol_Score', 'Brian_Score',
                     'Dividend_Yield', 'Payout_Ratio', 'Dividend_Growth', 'Div_Score',
                     'Composite_Score', 'Market_Price', 'HSI_Correlation', 'Sharpe_Ratio',
                     'Annualized_Return', 'Volume', 'Amplitude', 'Performance_Score']])

if __name__ == "__main__":
    hk_stocks = hk_stocks
    cleaned_stocks, cleaned_metrics, returns, hsi_returns, prices = preprocess_stocks(hk_stocks)
    portfolio, portfolio_corr, hedging_needed, portfolio_returns, portfolio_prices, hsi_returns = portfolio_selection(cleaned_stocks, returns, hsi_returns, prices, portfolio_size=5)
    visualize_portfolio(portfolio, portfolio_corr, portfolio_returns, portfolio_prices, hsi_returns)

Fast API

In [None]:
from fastapi import FastAPI
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from typing import List
from pydantic import BaseModel

app = FastAPI()

class PortfolioStock(BaseModel):
    ticker: str
    weight: float
    annualizedReturn: float
    volatility: float

def preprocess_stocks(hk_stocks):
    end_date = datetime(2025, 3, 9)
    start_date = datetime(2015, 1, 1)
    tickers = [yf.Ticker(t) for t in hk_stocks]

    try:
        data = yf.download(hk_stocks + ['^HSI'], start=start_date, end=end_date, progress=False, auto_adjust=False)
        prices = data['Close'].dropna(how='all')
        returns = prices.pct_change(fill_method=None).dropna(how='all')
        hsi_returns = returns.get('^HSI', pd.Series())
        highs = data['High']
        lows = data['Low']
        volumes = data['Volume']
    except Exception as e:
        print(f"Error downloading data: {e}")
        prices = pd.DataFrame(index=pd.date_range(start_date, end_date))
        returns = pd.DataFrame(index=pd.date_range(start_date, end_date))
        hsi_returns = pd.Series(index=pd.date_range(start_date, end_date))
        highs = lows = volumes = pd.DataFrame(index=prices.index)

    metrics = pd.DataFrame(index=hk_stocks)
    for t in tickers:
        try:
            info = t.info
            metrics.loc[t.ticker, 'ROE'] = float(info.get('returnOnEquity', 0)) * 100
            metrics.loc[t.ticker, 'PE_Ratio'] = float(info.get('trailingPE', np.nan))
            metrics.loc[t.ticker, 'PB_Ratio'] = float(info.get('priceToBook', np.nan))
            metrics.loc[t.ticker, 'Sector'] = info.get('sector', 'Unknown')
        except Exception as e:
            print(f"Error fetching info for {t.ticker}: {e}")
            metrics.loc[t.ticker, ['ROE', 'PE_Ratio', 'PB_Ratio', 'Sector']] = [0, np.nan, np.nan, 'Unknown']

    for t in hk_stocks:
        if t in returns.columns:
            metrics.loc[t, 'Volatility'] = returns[t].std() * (252 ** 0.5)
            metrics.loc[t, 'Beta'] = returns[t].cov(hsi_returns) / hsi_returns.var() if hsi_returns.var() > 0 else 0
            metrics.loc[t, 'Max_Drawdown'] = ((prices[t] - prices[t].cummax()) / prices[t].cummax()).min() * 100
        metrics.loc[t, 'Market_Price'] = prices[t].iloc[-1] if t in prices.columns and not prices[t].isna().all() else np.nan

    cleaned_metrics = metrics.dropna(subset=['Market_Price'])
    cleaned_stocks = cleaned_metrics.index.tolist()
    return cleaned_stocks, cleaned_metrics, returns, hsi_returns, prices, highs, lows, volumes

def value_investing(hk_stocks, prices):
    tickers = [yf.Ticker(t) for t in hk_stocks]
    metrics = pd.DataFrame(index=hk_stocks)
    for t in tickers:
        try:
            info = t.info
            metrics.loc[t.ticker, 'ROE'] = float(info.get('returnOnEquity', 0)) * 100
            metrics.loc[t.ticker, 'ROIC'] = float(info.get('returnOnAssets', 0)) * 100
            metrics.loc[t.ticker, 'Total_Debt'] = float(info.get('totalDebt', 0))
            metrics.loc[t.ticker, 'Equity'] = float(info.get('totalStockholderEquity', 1e-6))
            metrics.loc[t.ticker, 'Profit_Margin'] = float(info.get('profitMargins', 0)) * 100
            metrics.loc[t.ticker, 'PE_Ratio'] = float(info.get('trailingPE', np.nan))
            metrics.loc[t.ticker, 'PB_Ratio'] = float(info.get('priceToBook', np.nan))
            metrics.loc[t.ticker, 'Owner_Earnings'] = float(info.get('netIncomeToCommon', 0)) + float(info.get('depreciation', 0)) - float(info.get('capitalExpenditures', 0))
            metrics.loc[t.ticker, 'Gross_Margin'] = float(info.get('grossMargins', 0)) * 100
            metrics.loc[t.ticker, 'Earnings_Growth'] = float(info.get('earningsGrowth', 0)) * 100
        except Exception as e:
            print(f"Error for {t.ticker}: {e}")
            metrics.loc[t.ticker] = 0

    metrics['Debt_to_Equity'] = metrics['Total_Debt'] / metrics['Equity']
    metrics['Market_Price'] = [prices[t].iloc[-1] if t in prices.columns else np.nan for t in hk_stocks]
    growth_rate = 0.03
    discount_rate = 0.12
    metrics['Intrinsic_Value'] = metrics['Owner_Earnings'] * (1 + growth_rate) / (discount_rate - growth_rate)

    metrics['ROE_Score'] = np.clip((metrics['ROE'] - 15) / (metrics['ROE'].max() - 15 + 1e-6), 0, 1)
    metrics['ROIC_Score'] = np.clip((metrics['ROIC'] - 10) / (metrics['ROIC'].max() - 10 + 1e-6), 0, 1)
    metrics['Debt_Score'] = np.clip(1 - (metrics['Debt_to_Equity'] / (metrics['Debt_to_Equity'].max() + 1e-6)), 0, 1)
    metrics['Profit_Score'] = np.clip((metrics['Profit_Margin'] - 5) / (metrics['Profit_Margin'].max() - 5 + 1e-6), 0, 1)
    metrics['PE_Score'] = np.clip(1 - (metrics['PE_Ratio'] - metrics['PE_Ratio'].min()) / (metrics['PE_Ratio'].max() - metrics['PE_Ratio'].min() + 1e-6), 0, 1)
    metrics['PB_Score'] = np.clip(1 - (metrics['PB_Ratio'] - metrics['PB_Ratio'].min()) / (metrics['PB_Ratio'].max() - metrics['PB_Ratio'].min() + 1e-6), 0, 1)
    metrics['Growth_Score'] = np.clip((metrics['Earnings_Growth'] - 5) / (metrics['Earnings_Growth'].max() - 5 + 1e-6), 0, 1)
    metrics['Margin_Score'] = np.clip((metrics['Gross_Margin'] - 30) / (metrics['Gross_Margin'].max() - 30 + 1e-6), 0, 1)
    metrics['Value_Score'] = np.nanmean([metrics['ROE_Score'], metrics['ROIC_Score'], metrics['Debt_Score'], metrics['Profit_Score'], metrics['PE_Score'], metrics['PB_Score'], metrics['Growth_Score'], metrics['Margin_Score']], axis=0)

    return metrics[['ROE', 'PE_Ratio', 'PB_Ratio', 'Value_Score', 'Intrinsic_Value', 'ROIC', 'Earnings_Growth', 'Gross_Margin']]

def magic_formula(hk_stocks, prices):
    metrics = pd.DataFrame(index=hk_stocks)
    for t in [yf.Ticker(t) for t in hk_stocks]:
        try:
            info = t.info
            metrics.loc[t.ticker, 'EBIT'] = float(info.get('ebitda', 0))
            metrics.loc[t.ticker, 'EV'] = float(info.get('enterpriseValue', 1e-6))
            metrics.loc[t.ticker, 'ROIC'] = float(info.get('returnOnAssets', 0)) * 100
            metrics.loc[t.ticker, 'Debt_to_Equity'] = float(info.get('totalDebt', 0)) / float(info.get('totalStockholderEquity', 1e-6))
        except Exception as e:
            print(f"Error for {t.ticker}: {e}")
            metrics.loc[t.ticker] = 0

    metrics['Earnings_Yield'] = metrics['EBIT'] / metrics['EV'] * 100
    metrics = metrics[metrics['Earnings_Yield'] > 0]
    metrics['EY_Score'] = np.clip((metrics['Earnings_Yield'] - 6) / (metrics['Earnings_Yield'].max() - 6 + 1e-6), 0, 1)
    metrics['ROIC_Score'] = np.clip((metrics['ROIC'] - 10) / (metrics['ROIC'].max() - 10 + 1e-6), 0, 1)
    metrics['Debt_Score'] = np.clip(1 - (metrics['Debt_to_Equity'] / (metrics['Debt_to_Equity'].max() + 1e-6)), 0, 1)
    metrics['Magic_Score'] = np.nanmean([0.5 * metrics['EY_Score'], 0.4 * metrics['ROIC_Score'], 0.1 * metrics['Debt_Score']], axis=0)

    return metrics[['Magic_Score']]

def low_volatility(hk_stocks, returns, hsi_returns, prices):
    metrics = pd.DataFrame(index=hk_stocks)
    for t in hk_stocks:
        if t in returns.columns:
            metrics.loc[t, 'Volatility'] = returns[t].std() * np.sqrt(252)
            metrics.loc[t, 'Beta'] = returns[t].cov(hsi_returns) / hsi_returns.var() if hsi_returns.var() > 0 else 0
            metrics.loc[t, 'Max_Drawdown'] = ((prices[t] - prices[t].cummax()) / prices[t].cummax()).min() * 100
            X = sm.add_constant(hsi_returns)
            model = sm.OLS(returns[t], X).fit()
            metrics.loc[t, 'Idio_Vol'] = model.resid.std() * np.sqrt(252)
        else:
            metrics.loc[t, ['Volatility', 'Beta', 'Max_Drawdown', 'Idio_Vol']] = np.nan
        metrics.loc[t, 'Sector'] = yf.Ticker(t).info.get('sector', 'Unknown')

    metrics['Risk_Parity_Score'] = 1 / (metrics['Volatility'] + 1e-6)
    metrics['Vol_Score'] = np.clip(1 - (metrics['Volatility'] - metrics['Volatility'].min()) / (metrics['Volatility'].max() - metrics['Volatility'].min() + 1e-6), 0, 1)
    metrics['Beta_Score'] = np.clip(1 - (metrics['Beta'] - 0) / 1.5, 0, 1)
    metrics['Drawdown_Score'] = np.clip((metrics['Max_Drawdown'] - metrics['Max_Drawdown'].min()) / (max(metrics['Max_Drawdown']) - min(metrics['Max_Drawdown']) + 1e-6), 0, 1)
    metrics['Idio_Score'] = np.clip(1 - (metrics['Idio_Vol'] - metrics['Idio_Vol'].min()) / (metrics['Idio_Vol'].max() - metrics['Idio_Vol'].min() + 1e-6), 0, 1)
    metrics['Low_Vol_Score'] = np.nanmean([metrics['Vol_Score'], metrics['Beta_Score'], metrics['Drawdown_Score'], metrics['Idio_Score']], axis=0)

    return metrics[['Volatility', 'Beta', 'Max_Drawdown', 'Low_Vol_Score', 'Risk_Parity_Score', 'Sector']]

def dividend_aristocrats(hk_stocks, prices):
    end_date = datetime(2025, 3, 9)
    metrics = pd.DataFrame(index=hk_stocks)
    for t in [yf.Ticker(t) for t in hk_stocks]:
        try:
            info = t.info
            share_price = float(info.get('previousClose', prices[t.ticker].iloc[-1]))
            dividends = t.dividends
            ttm_div = dividends[(dividends.index >= (end_date - timedelta(days=365)))].sum()
            yield_pct = min((ttm_div / share_price * 100) if share_price > 0 else 0, 10)
            eps = float(info.get('trailingEps', 1e-6))
            metrics.loc[t.ticker, 'Dividend_Yield'] = yield_pct
            metrics.loc[t.ticker, 'Payout_Ratio'] = (ttm_div / eps) * 100 if eps != 0 else 0
            metrics.loc[t.ticker, 'Earnings_Growth'] = float(info.get('earningsGrowth', 0)) * 100
            metrics.loc[t.ticker, 'PE_Ratio'] = float(info.get('trailingPE', np.nan))
        except Exception as e:
            print(f"Error for {t.ticker}: {e}")
            metrics.loc[t.ticker] = 0

    metrics['Market_Price'] = [prices[t].iloc[-1] if t in prices.columns else np.nan for t in hk_stocks]
    metrics['Yield_Score'] = np.clip((metrics['Dividend_Yield'] - 2) / (10 - 2 + 1e-6), 0, 1)
    metrics['Payout_Score'] = np.clip(1 - (metrics['Payout_Ratio'] - 20) / (80 - 20 + 1e-6), 0, 1)
    metrics['Growth_Score'] = np.clip((metrics['Earnings_Growth'] - 5) / (metrics['Earnings_Growth'].max() - 5 + 1e-6), 0, 1)
    metrics['PE_Score'] = np.clip(1 - (metrics['PE_Ratio'] - 10) / (30 - 10 + 1e-6), 0, 1)
    metrics['Div_Score'] = np.nanmean([metrics['Yield_Score'], metrics['Payout_Score'], metrics['Growth_Score'], metrics['PE_Score']], axis=0)

    return metrics[['Dividend_Yield', 'Payout_Ratio', 'Dividend_Growth', 'Div_Score', 'Market_Price', 'Earnings_Growth']]

def portfolio_selection(hk_stocks, returns, hsi_returns, prices, highs, lows, volumes, portfolio_size=5):
    value_df = value_investing(hk_stocks, prices)
    magic_df = magic_formula(hk_stocks, prices)
    low_vol_df = low_volatility(hk_stocks, returns, hsi_returns, prices)
    div_df = dividend_aristocrats(hk_stocks, prices)

    combined = pd.DataFrame(index=hk_stocks)
    combined = combined.join(value_df, how='left')
    combined = combined.join(magic_df, how='left')
    combined = combined.join(low_vol_df, how='left')
    combined = combined.join(div_df, how='left')
    combined['Composite_Score'] = np.nanmean([combined['Value_Score'], combined['Magic_Score'], combined['Low_Vol_Score'], combined['Div_Score']], axis=0)

    five_year_start = datetime(2020, 3, 9)
    recent_returns = returns[five_year_start:] if not returns.empty else pd.DataFrame()
    hsi_recent_returns = hsi_returns[five_year_start:] if not hsi_returns.empty else pd.Series()
    combined['HSI_Correlation'] = [recent_returns[t].rolling(252).corr(hsi_recent_returns).mean() if t in recent_returns.columns else 0 for t in hk_stocks]

    risk_free_rate = 0.02
    end_date = datetime(2025, 3, 9)
    start_date = datetime(2015, 1, 1)
    years = (end_date - start_date).days / 365.25
    tickers = [yf.Ticker(t) for t in hk_stocks]
    for t in tickers:
        try:
            if t.ticker in prices.columns:
                price_start = prices[t.ticker].iloc[0]
                price_end = prices[t.ticker].iloc[-1]
                dividends = t.dividends
                total_dividends = dividends[(dividends.index >= start_date) & (dividends.index <= end_date)].sum() if not dividends.empty else 0
                total_return = (price_end + total_dividends - price_start) / price_start if price_start != 0 else 0
                annualized_return = ((1 + total_return) ** (1 / years) - 1) * 100
                daily_returns = returns[t.ticker] if t.ticker in returns.columns else pd.Series()
                sharpe_ratio = ((daily_returns.mean() * 252 - risk_free_rate) / (daily_returns.std() * np.sqrt(252))) if daily_returns.std() > 0 else 0
                volume_avg = volumes[t.ticker].mean() if t.ticker in volumes.columns else 0
                amplitude = ((highs[t.ticker] - lows[t.ticker]) / prices[t.ticker]).mean() * 100 if t.ticker in highs.columns else 0
                combined.loc[t.ticker, 'Annualized_Return'] = annualized_return
                combined.loc[t.ticker, 'Sharpe_Ratio'] = sharpe_ratio
                combined.loc[t.ticker, 'Volume'] = volume_avg
                combined.loc[t.ticker, 'Amplitude'] = amplitude
                combined.loc[t.ticker, 'Name'] = t.info.get('longName', t.ticker)
        except Exception as e:
            print(f"Error for {t.ticker}: {e}")

    combined['Sell'] = (combined['ROIC'] < 8) | (combined['Market_Price'] > 1.2 * combined['Intrinsic_Value']) | (combined['Earnings_Growth'] < 0)
    eligible = combined[~combined['Sell'] & (combined['HSI_Correlation'] <= 0.7)].copy()

    sector_counts = eligible['Sector'].value_counts()
    eligible = eligible.groupby('Sector').apply(lambda x: x.nlargest(int(portfolio_size * 0.3 / max(1, sector_counts[x.name])), 'Composite_Score')).reset_index(drop=True)

    eligible['Performance_Score'] = (
        0.4 * (eligible['Annualized_Return'] / (eligible['Annualized_Return'].max() + 1e-6)) +
        0.3 * (eligible['Sharpe_Ratio'] / (eligible['Sharpe_Ratio'].max() + 1e-6)) +
        0.3 * (eligible['Risk_Parity_Score'] / (eligible['Risk_Parity_Score'].max() + 1e-6))
    )

    portfolio = eligible.sort_values(by='Performance_Score', ascending=False).head(portfolio_size)
    portfolio_tickers = portfolio.index.tolist()
    total_risk = portfolio['Volatility'].sum()
    portfolio['Weight'] = (1 / portfolio['Volatility']) / (sum(1 / portfolio['Volatility'])) if total_risk > 0 else 1 / len(portfolio)

    portfolio_returns = returns[portfolio_tickers].dot(portfolio['Weight'])
    portfolio_cum_returns = (1 + portfolio_returns).cumprod()
    total_return = portfolio_cum_returns.iloc[-1] - 1
    annualized_return = ((1 + total_return) ** (1 / years) - 1) * 100
    volatility = portfolio_returns.std() * np.sqrt(252)
    sharpe = (annualized_return / 100 - risk_free_rate) / volatility if volatility > 0 else 0

    return annualized_return, volatility, sharpe

@app.post("/portfolio/performance")
async def get_portfolio_performance(stocks: List[PortfolioStock]):
    hk_stocks = [stock.ticker for stock in stocks]
    cleaned_stocks, _, returns, hsi_returns, prices, highs, lows, volumes = preprocess_stocks(hk_stocks)
    annualized_return, volatility, sharpe = portfolio_selection(cleaned_stocks, returns, hsi_returns, prices, highs, lows, volumes, len(hk_stocks))
    return {
        "annualizedReturn": annualized_return,
        "volatility": volatility,
        "sharpe": sharpe,
        "perTradeReturn": annualized_return / 252,
        "alert": "High volatility detected!" if volatility > 0.3 else None
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)