In [1]:
import pandas as pd
import numpy as np
import yfinance as yf

In [2]:
@np.vectorize
def convert2yf(code):
    """Convert stockcode to yFinance format"""
    return (4 - len(str(code))) * '0' + str(code) +'.HK'

In [3]:
url = 'https://www.hkex.com.hk/eng/services/trading/securities/securitieslists/ListOfSecurities.xlsx'
hkStockList = pd.read_excel(url, skiprows=2, index_col=0 )
#hkStockList.index = hkStockList.index.astype(str, inplace=True)
#hkStockList.info()
# Filter equity, trading currency, Sub-Category
filteredList = hkStockList[(hkStockList['Category']=='Equity') & 
                            (hkStockList['Sub-Category']=='Equity Securities (Main Board)') &
                            (hkStockList['Trading Currency']=='HKD')]
#filteredList.info()

In [4]:
hk_tickers = convert2yf(filteredList.index.tolist())
#ignore if already done it before
save = pd.DataFrame(hk_tickers,columns=['Ticker'])
save.to_csv('stock_list.csv',index=False)

In [5]:
hk_stocks = pd.read_csv('stock_list.csv').Ticker
hk_stocks

0       0001.HK
1       0002.HK
2       0003.HK
3       0004.HK
4       0005.HK
         ...   
2293    9995.HK
2294    9996.HK
2295    9997.HK
2296    9998.HK
2297    9999.HK
Name: Ticker, Length: 2298, dtype: object

In [6]:
def get_stock_data(stock_list):
    data = []
    for ticker in stock_list:
        try:
            print('Getting Data:',ticker)
            stock = yf.Ticker(ticker)
            info = stock.info
            
            # Get Market Cap (convert to HKD if in USD)
            market_cap = info.get('marketCap', None)
            if market_cap and info.get('currency', 'USD') == 'USD':
                market_cap *= 7.8  # Convert USD to HKD
            
            # Get financial statements
            financials = stock.financials
            balance_sheet = stock.balance_sheet  # Add balance sheet data
            if financials.empty or balance_sheet.empty:
                continue
                
            # Initialize dictionaries to store annual data
            revenue_data = {}
            eps_data = {}
            roa_data = {}
            
            for date in financials.columns:
                year = date.year
                if year<2021 or year>2024:
                    continue
                
                # Revenue
                if 'Total Revenue' in financials.index:
                    revenue = financials.loc['Total Revenue', date]
                elif 'Revenue' in financials.index:
                    revenue = financials.loc['Revenue', date]
                else:
                    revenue = None
                
                # Basic EPS
                if 'Basic EPS' in financials.index:
                    eps = financials.loc['Basic EPS', date]
                elif 'Earnings Per Share' in financials.index:
                    eps = financials.loc['Earnings Per Share', date]
                elif 'Net Income' in financials.index and 'Weighted Average Shares' in financials.index:
                    net_income = financials.loc['Net Income', date]
                    shares = financials.loc['Weighted Average Shares', date]
                    eps = net_income / shares if shares != 0 else None
                else:
                    eps = None

                # ROA
                if 'EBITDA' in financials.index and date in balance_sheet.columns:
                    ebitda = financials.loc['EBITDA', date]
                    total_assets = balance_sheet.loc['Total Assets', date]
                    roa = (ebitda / total_assets * 100) if total_assets != 0 else None
                
                # Convert to HKD if needed
                if info.get('financialCurrency', 'USD') == 'USD':
                    if revenue: revenue *= 7.8
                    if eps: eps *= 7.8
                if info.get('financialCurrency', 'CNY') == 'CNY':
                    if revenue: revenue *= 1.1
                    if eps: eps *= 1.1
                # Convert EBITDA if needed
                    if info.get('financialCurrency') == 'USD':
                        if roa is not None: roa *= 7.8
                    elif info.get('financialCurrency') == 'CNY':
                        if roa is not None: roa *= 1.1
                
                # Store the data
                if revenue: revenue_data[f'Revenue {year} (HKD)'] = revenue
                if eps: eps_data[f'EPS {year} (HKD)'] = eps
                if roa is not None:
                        # Round to 2 decimal places and add % to column name
                        roa_data[f'ROA {year} (%)'] = round(roa, 2) 
            
            # Prepare the data record
            record = {
                'Ticker': ticker,
                'Name': info.get('shortName', ticker),
                'Market Cap (HKD)': market_cap,
                'Sector': info.get('sector', 'N/A'),
                **revenue_data,
                **eps_data,
                **roa_data
            }
            data.append(record)
            
        except Exception as e:
            print(f"Error fetching {ticker}: {e}")
    return pd.DataFrame(data)

# Get data and sort by market cap
df_cap = get_stock_data(hk_stocks)
df_cap = df_cap.sort_values('Market Cap (HKD)', ascending=False)

Getting Data: 0001.HK
Getting Data: 0002.HK
Getting Data: 0003.HK
Getting Data: 0004.HK
Getting Data: 0005.HK
Getting Data: 0006.HK
Getting Data: 0007.HK
Getting Data: 0008.HK
Getting Data: 0009.HK
Getting Data: 0010.HK
Getting Data: 0011.HK
Getting Data: 0012.HK
Getting Data: 0013.HK
Getting Data: 0014.HK
Getting Data: 0016.HK
Getting Data: 0017.HK
Getting Data: 0018.HK
Getting Data: 0019.HK
Getting Data: 0020.HK
Getting Data: 0021.HK
Getting Data: 0022.HK
Getting Data: 0023.HK
Getting Data: 0025.HK
Getting Data: 0026.HK
Getting Data: 0027.HK
Getting Data: 0028.HK
Getting Data: 0029.HK
Getting Data: 0030.HK
Getting Data: 0031.HK
Getting Data: 0032.HK
Getting Data: 0033.HK
Getting Data: 0034.HK
Getting Data: 0035.HK
Getting Data: 0036.HK
Getting Data: 0037.HK
Getting Data: 0038.HK
Getting Data: 0039.HK
Getting Data: 0040.HK
Getting Data: 0041.HK
Getting Data: 0042.HK
Getting Data: 0045.HK
Getting Data: 0046.HK
Getting Data: 0048.HK
Getting Data: 0050.HK
Getting Data: 0051.HK
Getting Da

In [7]:
df_cap

Unnamed: 0,Ticker,Name,Market Cap (HKD),Sector,Revenue 2024 (HKD),Revenue 2023 (HKD),Revenue 2022 (HKD),Revenue 2021 (HKD),EPS 2024 (HKD),EPS 2023 (HKD),EPS 2022 (HKD),EPS 2021 (HKD),ROA 2024 (%),ROA 2023 (%),ROA 2022 (%),ROA 2021 (%)
542,0700.HK,TENCENT,4.676075e+12,Communication Services,7.262827e+11,6.699165e+11,6.100072e+11,6.161298e+11,23.031800,13.404600,21.732700,25.956700,19.16,16.20,19.62,21.40
1035,1398.HK,ICBC,2.576130e+12,Financial Services,8.990124e+11,9.218528e+11,9.584718e+11,1.033882e+12,1.078000,1.078000,1.067000,1.045000,12.65,13.92,15.31,16.84
2280,9988.HK,BABA-W,2.335392e+12,Consumer Cyclical,1.035285e+12,9.555557e+11,9.383682e+11,7.890179e+11,4.345000,3.806000,3.157000,7.645000,10.22,9.61,8.32,14.19
962,1288.HK,ABC,2.085518e+12,Financial Services,7.815313e+11,7.641029e+11,7.640446e+11,7.914566e+11,,0.792000,0.759000,0.715000,13.11,14.42,15.86,17.45
713,0941.HK,CHINA MOBILE,1.928770e+12,Communication Services,1.144835e+12,1.110240e+12,1.030985e+12,9.330838e+11,,6.776000,6.468000,6.237000,19.44,21.04,20.75,20.77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,0362.HK,C ZENITH CHEM,5.068300e+05,Utilities,3.156200e+07,1.008470e+08,1.986640e+08,2.290210e+08,-0.225700,-0.489700,-9.575400,-6.137598,-8.20,-1.39,-1.01,-7.69
694,0917.HK,QUNABOX GROUP,,Communication Services,1.473450e+09,1.107367e+09,6.089787e+08,5.526048e+08,-9.240000,0.548281,-0.485819,-0.583968,-81.99,22.31,-5.16,-12.27
1368,1863.HK,"1863.HK,0P0000NB5Q,0",,,9.997896e+09,8.013119e+09,7.950408e+09,1.236391e+10,0.559416,0.634062,0.465036,1.075932,6.56,7.47,8.86,13.75
1891,2906.HK,WAH YAN-OLD,,,5.463900e+07,5.134000e+07,3.376500e+07,1.699000e+06,0.010000,0.038000,-0.059000,-0.081000,603.41,50.82,3.88,-37.95


In [8]:
df_cap.to_csv('test/cap.csv', index=False)