In [1]:
import pandas as pd
import yfinance as yf
import time

# Context

We use the `yfinance` API to download market data from Yahoo Finance. This includes nearly all stocks from BlackRock's *iShares MSCI ACWI ETF*, a broad ETF comprising large and mid-capitalization developed and emerging market equities. Additionally, data from bonds, FX, crypto, and commodities markets are included.

The result is two files:

- `stocks_candles.csv`: Contains the daily performance data of assets.
- `stocks_info.csv`: Includes information about the assets, such as quote currency and equity description.

## Defining functions

In [None]:
def format_tickers_for_yfinance(df):
    
    # Creating Yahoo compatible tickers from BlackRock's data
    
    exchange_suffixes = {
        "NASDAQ": "",
        "New York Stock Exchange Inc.": "",
        "Hong Kong Exchanges And Clearing Ltd": ".HK",
        "Euronext Amsterdam": ".AS",
        "Xetra": ".DE",
        "SIX Swiss Exchange": ".SW",
        "Omx Nordic Exchange Copenhagen A/S": ".CO",
        "London Stock Exchange": ".L",
        "Korea Exchange (Stock Market)": ".KS",
        "Nyse Euronext - Euronext Paris": ".PA",
        "Tokyo Stock Exchange": ".T",
        "Toronto Stock Exchange": ".TO",
        "Asx - All Markets": ".AX",
        "National Stock Exchange Of India": ".NS",
        "Bolsa De Madrid": ".MC",
        "Borsa Italiana": ".MI",
        "Singapore Exchange": ".SI",
        "Nyse Euronext - Euronext Brussels": ".BR",
        "Saudi Stock Exchange": ".SR",
        "Nasdaq Omx Nordic": ".ST",
        "Johannesburg Stock Exchange": ".JO",
        "Nasdaq Omx Helsinki Ltd.": ".HE",
        "XBSP": ".SA",
        "Indonesia Stock Exchange": ".JK",
        "Kuwait Stock Exchange": ".KW",
        "Dubai Financial Market": ".AE",
        "Oslo Bors Asa": ".OL",
        "Wiener Boerse Ag": ".VI",
        "Cboe BZX": "",
        "Bolsa Mexicana De Valores": ".MX",
        "Qatar Exchange": ".QA",
        "Warsaw Stock Exchange/Equities/Main Market": ".WA",
        "Irish Stock Exchange - All Market": ".IR",
        "Tel Aviv Stock Exchange": ".TA",
        "Shanghai Stock Exchange": ".SS",
        "Abu Dhabi Securities Exchange": ".AD",
        "Budapest Stock Exchange": ".BD",
        "Bursa Malaysia": ".KL",
        "Stock Exchange Of Thailand": ".BK",
        "Korea Exchange (Kosdaq)": ".KQ",
        "New Zealand Exchange Ltd": ".NZ",
        "Shenzhen Stock Exchange": ".SZ",
        "Nyse Euronext - Euronext Lisbon": ".LS",
        "Athens Exchange S.A. Cash Market": ".AT",
        "Deutsche Boerse Xetra": ".DE",
        "Prague Stock Exchange": ".PR",
        "Istanbul Stock Exchange": ".IS",
        "Bolsa De Valores De Colombia": ".CL",
        "Gretai Securities Market": ".TWO",
        "Philippine Stock Exchange Inc.": ".PS",
        "Santiago Stock Exchange": ".SN",
        "Egyptian Exchange": ".CA",
        "Standard-Classica-Forts": ".ME",
        "Taiwan Stock Exchange": ".TW",
    }

    def format_ticker(row):
        ticker = str(row["Ticker"])
        exchange = row["Exchange"]
        name = row["Name"]

        ticker = ticker.strip()
        if ticker.endswith("."):
            ticker = ticker[:-1]
        ticker = ticker.replace("_", "-")
        ticker = ticker.replace(".", "-")
        ticker = ticker.replace(" ", "-")
        ticker = ticker.replace("*", "")
        
        if exchange in ["Hong Kong Exchanges And Clearing Ltd"] and ticker.isnumeric():
            ticker = ticker.zfill(4)  # Make 4 digits
        if exchange in [
            "Korea Exchange (Stock Market)", "Korea Exchange (Kosdaq)",
            "Shanghai Stock Exchange", "Shenzhen Stock Exchange"
        ] and ticker.isnumeric():
            ticker = ticker.zfill(6)  # Make 6 digits

        suffix = exchange_suffixes.get(exchange, "")

        ticker_yahoo = ticker + suffix

        if name == 'BERKSHIRE HATHAWAY INC CLASS B': # Fixing 
            ticker_yahoo = 'BRK-B' 
        if name == 'HEICO CORP CLASS A':
            ticker_yahoo = 'HEI-A'

        return ticker_yahoo
    
    return df.apply(format_ticker, axis=1)


def get_yahoo_data(tickers, start, end):

    stocks_info = []
    stocks_candles = {}
    omitted_symbols = []
    info_params = ['symbol', 'shortName', 'typeDisp', 'currency', 'region', 'exchange',
                'industry', 'sector', 'country', 'longBusinessSummary', 'description']

    for ticker in tickers:

        time.sleep(0.2)

        try:
            stock = yf.Ticker(ticker)
            
            stock_info = stock.info if stock.info else {}
            stock_info_data = {
                param: stock_info.get(param, "-") for param in info_params
            }

            stock_candles = stock.history(start=start, end=end)
            
            if stock_info_data and not stock_candles.empty:
                stocks_info.append(stock_info_data)
                stocks_candles[stock_info_data['symbol']] = stock_candles
            else:
                omitted_symbols.append(ticker)

        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")
            omitted_symbols.append(ticker)

    df_stocks_info = pd.DataFrame(stocks_info) if len(stocks_info) > 0 else pd.DataFrame()
    df_stocks_candles = \
        pd.concat(stocks_candles, names=["Symbol", "Date"]).reset_index() if len(stocks_candles) > 0 else pd.DataFrame()

    return df_stocks_info, df_stocks_candles, omitted_symbols

In [None]:
new_download = False # ! Toggle to download data or look at the already downloaded one

## Defining equity symbols

In [4]:
# List of MSCI ACWI stocks downloaded brom BlackRock's website
if new_download:
    df_BR_symbols = pd.read_csv('iShares MSCI ACWI ETF.csv', skiprows=9, keep_default_na=False).iloc[:-2,:]
    print(f"Black Rock's asset classes are {df_BR_symbols['Asset Class'].unique()}")
    df_equity_symbols = df_BR_symbols[df_BR_symbols['Asset Class']=='Equity'].copy() # Only equities, other assets later
    df_equity_symbols = df_equity_symbols[
        df_equity_symbols['Exchange'].isin(['New York Stock Exchange Inc.', 'NASDAQ',
                                            'Euronext Amsterdam','Nyse Euronext - Euronext Brussels',
                                            'Nyse Euronext - Euronext Paris','Nyse Euronext - Euronext Lisbon',
                                            'Warsaw Stock Exchange/Equities/Main Market'])] # Limiting to main exchanges, otherwise a lot of data
    df_equity_symbols['SymbolYahoo'] = format_tickers_for_yfinance(df_equity_symbols)
    df_equity_symbols.to_csv('equity_tickers.csv', index=False)
else:
    df_equity_symbols = pd.read_csv('equity_tickers.csv')
    
df_equity_symbols

Black Rock's asset classes are ['Equity' 'Money Market' 'Cash' 'Cash Collateral and Margins' 'FX'
 'Futures']


Unnamed: 0,Ticker,Name,Sector,Asset Class,Market Value,Weight (%),Notional Value,Shares,Price,Location,Exchange,Currency,FX Rate,Market Currency,Accrual Date,SymbolYahoo
0,AAPL,APPLE INC,Information Technology,Equity,787946502.00,4.15,787946502.00,3681993.00,214.00,United States,NASDAQ,USD,1.00,USD,-,AAPL
1,NVDA,NVIDIA CORP,Information Technology,Equity,714866735.91,3.77,714866735.91,5980647.00,119.53,United States,NASDAQ,USD,1.00,USD,-,NVDA
2,MSFT,MICROSOFT CORP,Information Technology,Equity,670198094.80,3.53,670198094.80,1724204.00,388.70,United States,NASDAQ,USD,1.00,USD,-,MSFT
3,AMZN,AMAZON COM INC,Consumer Discretionary,Equity,452317949.40,2.38,452317949.40,2310810.00,195.74,United States,NASDAQ,USD,1.00,USD,-,AMZN
4,META,META PLATFORMS INC CLASS A,Communication,Equity,321956210.30,1.70,321956210.30,532247.00,604.90,United States,NASDAQ,USD,1.00,USD,-,META
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2119,ZAB,ZABKA GROUP SOCIETE ANONYME SA,Consumer Staples,Equity,424080.42,0.00,424080.42,72271.00,5.87,Poland,Warsaw Stock Exchange/Equities/Main Market,USD,3.83,PLN,-,ZAB.WA
2127,JDEP,JDE PEETS NV,Consumer Staples,Equity,413546.39,0.00,413546.39,20501.00,20.17,Netherlands,Euronext Amsterdam,USD,0.92,EUR,-,JDEP.AS
2222,CVC,CVC CAPITAL PARTNERS PLC,Financials,Equity,255064.54,0.00,255064.54,12347.00,20.66,Netherlands,Euronext Amsterdam,USD,0.92,EUR,-,CVC.AS
2227,PGE,PGE POLSKA GRUPA ENERGETYCZNA SA,Utilities,Equity,244953.12,0.00,244953.12,121462.00,2.02,Poland,Warsaw Stock Exchange/Equities/Main Market,USD,3.83,PLN,-,PGE.WA


## Defining all symbols to download

In [None]:
symbols_equity = list(df_equity_symbols['SymbolYahoo'])
symbols_currency = ['EURUSD=X', 'JPY=X', 'GBPUSD=X', 'AUDUSD=X', 'NZDUSD=X', 'EURJPY=X', 'GBPJPY=X', 'EURGBP=X', 'EURCAD=X', 'EURSEK=X', 'EURCHF=X', 'EURHUF=X', 'CNY=X', 'HKD=X', 'SGD=X', 'INR=X', 'MXN=X', 'PHP=X', 'IDR=X', 'THB=X', 'MYR=X', 'ZAR=X', 'RUB=X']
symbols_crypto = ['USDT-USD', 'BTC-USD', 'ETH-USD', 'USDC-USD', 'XRP-USD', 'SOL-USD', 'BNB-USD', 'ADA-USD', 'DOGE-USD', 'TRX-USD']
symbols_bonds = ['^IRX', '^FVX', '^TNX', '^TYX', '2YY=F', 'ZN=F', 'ZF=F', 'ZT=F', 'ZB=F']
symbols_interest_rates = []
symbols_indices = [
        '^GSPC',      # S&P 500 – The leading benchmark for the U.S. stock market.
        '^DJI',       # Dow Jones Industrial Average – Tracks 30 major U.S. companies.
        '^IXIC',      # NASDAQ Composite – Represents the U.S. tech sector.
        '^STOXX50E',  # Euro Stoxx 50 – A key European index covering the Eurozone.
        '^GDAXI',     # DAX – Germany's top index, representing Europe's largest economy.
        '^N225',      # Nikkei 225 – Japan's primary stock index and a key Asian benchmark.
        '^HSI',       # Hang Seng Index – Hong Kong's stock market, a proxy for China.
        '000001.SS',  # Shanghai Composite – The main index for mainland China's markets.
        '^AXJO',      # S&P/ASX 200 – Australia's most important stock market index.
        '^BSESN'      # BSE Sensex – India's leading index, representing a major emerging market.
    ]
symbols_commodities = ['GC=F', 'CL=F', 'ZC=F', 'NG=F', 'ZS=F', 'SI=F', 'HG=F', 'SB=F', 'BZ=F']

all_symbols = symbols_equity + symbols_currency + symbols_crypto + symbols_bonds + symbols_interest_rates + symbols_indices + symbols_commodities

## Downloading data from Yahoo

In [6]:
start = "2020-01-01"
end = "2025-03-22"

# Uncomment to download data
if new_download:
    df_stocks_info, df_stocks_candles, omitted_symbols = get_yahoo_data(all_symbols, start, end)
    
    # Check downloaded tickers
    print(f'Downloaded {round(100 - 100*len(omitted_symbols)/len(all_symbols))}% of equity symbols.')
    print("Not downloaded important equity symbols (at least 0.01% weight in MSCI):") # At least 0.01% of the portfolio and omitted
    df_equity_symbols[(df_equity_symbols['Weight (%)'].astype(float)>0.01) & \
                    (df_equity_symbols['SymbolYahoo'].isin(omitted_symbols))] # No important tickers were omitted

$BFB: possibly delisted; no timezone found
$UHALB: possibly delisted; no timezone found
$BES.LS: possibly delisted; no price data found  (1d 2020-01-01 -> 2025-03-22)


Downloaded 100% of equity symbols.
Not downloaded important equity symbols (at least 0.01% weight in MSCI):


In [7]:
if new_download:
    df_stocks_info.to_csv('stocks_info.csv', index=False)
    df_stocks_candles.to_csv('stocks_candles.csv', index=False)
else:
    df_stocks_info = pd.read_csv('stocks_info.csv')
    df_stocks_candles = pd.read_csv('stocks_candles.csv')

## A brief look at the data

In [8]:
df_stocks_info.head()

Unnamed: 0,symbol,shortName,typeDisp,currency,region,exchange,industry,sector,country,longBusinessSummary,description
0,AAPL,Apple Inc.,Equity,USD,US,NMS,Consumer Electronics,Technology,United States,"Apple Inc. designs, manufactures, and markets ...",-
1,NVDA,NVIDIA Corporation,Equity,USD,US,NMS,Semiconductors,Technology,United States,"NVIDIA Corporation, a computing infrastructure...",-
2,MSFT,Microsoft Corporation,Equity,USD,US,NMS,Software - Infrastructure,Technology,United States,Microsoft Corporation develops and supports so...,-
3,AMZN,"Amazon.com, Inc.",Equity,USD,US,NMS,Internet Retail,Consumer Cyclical,United States,"Amazon.com, Inc. engages in the retail sale of...",-
4,META,"Meta Platforms, Inc.",Equity,USD,US,NMS,Internet Content & Information,Communication Services,United States,"Meta Platforms, Inc. engages in the developmen...",-


In [9]:
df_stocks_candles.head()

Unnamed: 0,Symbol,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,AAPL,2020-01-02 00:00:00-05:00,71.721019,72.776598,71.466812,72.716072,135480400,0.0,0.0
1,AAPL,2020-01-03 00:00:00-05:00,71.941336,72.771752,71.783969,72.009125,146322800,0.0,0.0
2,AAPL,2020-01-06 00:00:00-05:00,71.127873,72.621654,70.876083,72.582916,118387200,0.0,0.0
3,AAPL,2020-01-07 00:00:00-05:00,72.592601,72.849231,72.021238,72.241554,108872000,0.0,0.0
4,AAPL,2020-01-08 00:00:00-05:00,71.943744,73.706264,71.943744,73.403633,132079200,0.0,0.0


In [10]:
print(omitted_symbols)

['BFB', 'UHALB', 'BES.LS']
