In [10]:
import pandas as pd

# Load the Excel file
file_path = "List_of_Companies.xlsx"
df = pd.read_excel(file_path, sheet_name='Table 2', skiprows=1)

# Display column names to confirm the exact name
print("Columns in file:", df.columns.tolist())

# Select only company name + stock code columns
clean_df = df[['PUBLIC LISTED COMPANIES', 'STOCK CODE']].copy()

# Drop rows with missing values in either column
clean_df = clean_df.dropna(subset=['PUBLIC LISTED COMPANIES', 'STOCK CODE'])

# Remove duplicates
clean_df = clean_df.drop_duplicates()

# Convert to string, strip spaces
clean_df['STOCK CODE'] = clean_df['STOCK CODE'].astype(str).str.strip()

# 2. Pad to 4 digits (yfinance KL format)
clean_df['ticker'] = clean_df['STOCK CODE'].str.zfill(4) + ".KL"

clean_df = clean_df.drop(columns=['STOCK CODE'], axis=1)

# Save the cleaned data to a new CSV file
clean_df.to_csv("cleaned_companies.csv", index=False)

Columns in file: ['NO', 'PUBLIC LISTED COMPANIES', 'STOCK CODE', 'TEAM']


In [11]:
import pandas as pd
import yfinance as yf

# Load cleaned CSV that contains the TICKER column
df = pd.read_csv("cleaned_companies.csv")

valid_tickers = []
invalid_tickers = []

# Loop through all tickers
for ticker in df['ticker']:
    stock = yf.Ticker(ticker)
    hist = stock.history(period="5d")  # small, fast check

    if hist.empty:
        invalid_tickers.append(ticker)
    else:
        valid_tickers.append(ticker)

# Print invalid tickers
print("\n❌ Tickers NOT found on yfinance:")
for t in invalid_tickers:
    print(t)

# Remove invalid tickers from DataFrame
df_cleaned = df[~df['ticker'].isin(invalid_tickers)].reset_index(drop=True)

# Save cleaned CSV
df_cleaned.to_csv("ticker_list.csv", index=False)

print("\n✅ Valid tickers saved to ticker_list.csv")
print(f"Valid: {len(valid_tickers)} | Invalid: {len(invalid_tickers)}")

HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 3051.KL"}}}
$3051.KL: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: 3011.KL"}}}
$3011.KL: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$5082.KL: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$3032.KL: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$3037.KL: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$3012.KL: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$3016.KL: possibly delisted; no 


❌ Tickers NOT found on yfinance:
3051.KL
3011.KL
5082.KL
3032.KL
3037.KL
3012.KL
3016.KL
3048.KL
3053.KL
3024.KL
3001.KL
0400GB.KL
3055.KL
3046.KL
3025.KL
3021.KL
5222.KL
0021.KL
3028.KL
3029.KL
3038.KL
3030.KL
0401GA.KL
7183.KL
5175.KL
3040.KL
3047.KL
5014.KL
3015.KL
3019.KL
3022.KL
3005.KL
3027.KL
0159.KL
5237.KL
3006.KL
3045.KL
5047.KL
4944.KL
3041.KL
3031.KL
0822EA.KL
3052.KL
5256.KL
3002.KL
5270.KL
3036.KL
3039.KL
3056.KL
3057.KL
7045.KL
3009.KL
3008.KL
3023.KL
3054.KL
3049.KL
3050.KL
3033.KL
3013.KL
3058.KL
0836EA.KL
0837EA.KL
3043.KL
3017.KL

✅ Valid tickers saved to ticker_list.csv
Valid: 994 | Invalid: 64


In [14]:
from pymongo import MongoClient
from urllib.parse import quote_plus 
# Connect to MongoDB
username = quote_plus("Wrynaft")
password = quote_plus("Ryan@120104")

client = MongoClient(f"mongodb+srv://{username}:{password}@cluster0.bjjt9fa.mongodb.net/?appName=Cluster0")
db = client['roundtable_ai']
print("Connected to MongoDB")

Connected to MongoDB


In [19]:
from tqdm import tqdm
import yfinance as yf
import time

prices_col = db['stock_prices']

df_tickers = pd.read_csv('ticker_list.csv')
tickers = df_tickers['ticker'].tolist()
for ticker in tqdm(tickers, desc="Inserting tickers into MongoDB"):
    try:
        data = yf.download(ticker, period="5y")

        if data is None or data.empty:
            print(f"Warning: No data found for ticker {ticker}. Skipping insertion.")
            continue

        # FIX 1: Flatten multi-index columns (important!)
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(0)

        docs = []
        for date, row in data.iterrows():
            # FIX 2: Safely extract scalar values
            open_price = row.get("Open")
            high_price = row.get("High")
            low_price = row.get("Low")
            close_price = row.get("Close")
            volume_val = row.get("Volume")

            doc = {
                "ticker": ticker,
                "date": date.strftime("%Y-%m-%d"),
                "open": float(open_price) if pd.notna(open_price) else None,
                "high": float(high_price) if pd.notna(high_price) else None,
                "low": float(low_price) if pd.notna(low_price) else None,
                "close": float(close_price) if pd.notna(close_price) else None,
                "volume": int(volume_val) if pd.notna(volume_val) else None
            }

            docs.append(doc)

        if docs:
            prices_col.insert_many(docs)

        time.sleep(0.1)

    except Exception as e:
        print(f"Error processing ticker {ticker}: {e}")
print("Data insertion complete.")

  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, period="5y")
[*********************100%***********************]  1 of 1 completed
 

Data insertion complete.





In [1]:
import yfinance as yf
ticker = "1155.KL"
ticker = yf.Ticker(ticker)
info = ticker.info
print(info)

{'address1': 'Menara Maybank', 'address2': '14th Floor 100 Jalan Tun Perak', 'city': 'Kuala Lumpur', 'zip': '50050', 'country': 'Malaysia', 'phone': '60 3 2070 8833', 'fax': '60 3 2031 0071', 'website': 'https://www.maybank.com', 'industry': 'Banks - Regional', 'industryKey': 'banks-regional', 'industryDisp': 'Banks - Regional', 'sector': 'Financial Services', 'sectorKey': 'financial-services', 'sectorDisp': 'Financial Services', 'longBusinessSummary': "Malayan Banking Berhad provides commercial banking and related financial products and services. The company operates in three segments: Group Community Financial Services, Group Global Banking, and Group Insurance and Takaful. It offers savings and fixed deposits, and current accounts; and housing and personal loans, project financing, overdrafts, and trade financing, as well as remittance services. The company also provides credit cards; bancassurance products; hire purchase, unit trust, cash management, custodian, and trustee services

In [3]:
ticker.quarterly_financials

Unnamed: 0,2025-09-30,2025-06-30,2025-03-31,2024-12-31,2024-09-30,2024-03-31
Tax Effect Of Unusual Items,576605.1,-238304.8,1604386.0,3652635.0,26933000.0,
Tax Rate For Calcs,0.224447,0.230469,0.264532,0.216517,0.240192,
Total Unusual Items,2569000.0,-1034000.0,6065000.0,16870000.0,112131000.0,
Total Unusual Items Excluding Goodwill,2569000.0,-1034000.0,6065000.0,16870000.0,112131000.0,
Net Income From Continuing Operation Net Minority Interest,2621404000.0,2628029000.0,2588857000.0,2532232000.0,2538327000.0,
Reconciled Depreciation,273665000.0,273049000.0,272287000.0,276201000.0,273043000.0,
Net Interest Income,3244848000.0,3148637000.0,3201639000.0,3337185000.0,3091271000.0,
Interest Expense,3903750000.0,4357283000.0,4538375000.0,4718651000.0,5033757000.0,
Interest Income,7148598000.0,7505920000.0,7740014000.0,8055836000.0,8125028000.0,
Normalized Income,2619412000.0,2628825000.0,2584396000.0,2519015000.0,2453129000.0,


In [None]:
fundamentals_col = db['fundamentals']
df_tickers = pd.read_csv('ticker_list.csv')
tickers = df_tickers['ticker'].tolist()

def assess_data_quality(quarterly_df: pd.DataFrame, annual_df: pd.DataFrame) -> dict:
    """Assess data quality and completeness of financial data."""
    quality_assessment = {
        "quarterly_data_available": not (quarterly_df is None or quarterly_df.empty),
        "annual_data_available": not (annual_df is None or annual_df.empty),
        "quarterly_periods": quarterly_df.shape[1] if quarterly_df is not None and not quarterly_df.empty else 0,
        "annual_periods": annual_df.shape[1] if annual_df is not None and not annual_df.empty else 0,
        "missing_data_percentage": 0,
        "data_freshness": "Unknown"
    }
    # Calculate missing data percentage
    if quarterly_df is not None and not quarterly_df.empty:
        total_cells = quarterly_df.size
        missing_cells = quarterly_df.isnull().sum().sum()
        quality_assessment["missing_data_percentage"] = (missing_cells / total_cells) * 100 if total_cells > 0 else 100

        # Check data freshness
        if quarterly_df.shape[1] > 0:
            latest_quarter = quarterly_df.columns[0]
            quality_assessment["data_freshness"] = str(latest_quarter)
    return quality_assessment

for ticker_symbol in tqdm(tickers, desc="Inserting fundamentals into MongoDB"):
    try:
        ticker = yf.Ticker(ticker_symbol)
        info = ticker.info

        doc = {
            "ticker": ticker_symbol,
            "company_name": info.get("longName"),
            "sector": info.get("sector"),
            "industry": info.get("industry"),
            "metrics": {
                "valuation": {
                    "market_cap": info.get("marketCap"),
                    "enterprise_value": info.get("enterpriseValue"),
                    "pe_ratio": info.get("forwardPE") or info.get("trailingPE"),
                    "peg_ratio": info.get("pegRatio"),
                    "price_to_book": info.get("priceToBook"),
                    "price_to_sales": info.get("priceToSalesTrailing12Months"),
                    "ev_to_revenue": info.get("enterpriseToRevenue"),
                    "ev_to_ebitda": info.get("enterpriseToEbitda")
                },
                "financial_health": {
                    "total_cash": info.get("totalCash"),
                    "total_debt": info.get("totalDebt"),
                    "current_ratio": info.get("currentRatio"),
                    "debt_to_equity": info.get("debtToEquity"),
                    "return_on_assets": info.get("returnOnAssets"),
                    "return_on_equity": info.get("returnOnEquity"),
                    "gross_margins": info.get("grossMargins"),
                    "operating_margins": info.get("operatingMargins"),
                    "profit_margins": info.get("profitMargins")
                },
                "growth": {
                    "revenue_growth": info.get("revenueGrowth"),
                    "earnings_growth": info.get("earningsGrowth"),
                    "revenue_per_share": info.get("revenuePerShare"),
                    "book_value": info.get("bookValue"),
                    "earnings_per_share": info.get("trailingEps"),
                    "forward_eps": info.get("forwardEps")
                }
            },
            "quarterly_income": ticker.quarterly_financials,
            "quarterly_balance_sheet": ticker.quarterly_balance_sheet,
            "quarterly_cashflow": ticker.quarterly_cashflow,
            "annual_income": ticker.financials,
            "annual_balance_sheet": ticker.balance_sheet,
            "annual_cashflow": ticker.cashflow,
            
        }

        fundamentals_col.insert_one(doc)

        time.sleep(0.1)

    except Exception as e:
        print(f"Error processing ticker {ticker_symbol}: {e}")

Banks - Regional
