# Create dataset with over 250 different asset types

Create a dataset to store all assets available on Yahoo Finance, including ETFs, stocks, commodities, cryptocurrencies, and others as needed.
This dataset will be used for Markowitz optimization and will be maintained in a dedicated Jupyter Notebook.


In [1]:
# List of ~250 most important traded assets across categories for Yahoo Finance

Asset_List = [
    # 100 Most Known Stocks (US Large Cap, Tech, Financials, Healthcare, etc.)
    "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "BRK-B", "UNH", "JPM",
    "V", "JNJ", "WMT", "PG", "MA", "XOM", "LLY", "HD", "MRK", "ABBV",
    "AVGO", "COST", "PEP", "KO", "CVX", "MCD", "BAC", "ADBE", "PFE", "CSCO",
    "TMO", "DIS", "ABT", "VZ", "CRM", "ACN", "DHR", "NKE", "LIN", "TXN",
    "WFC", "INTC", "ORCL", "NEE", "PM", "MS", "UNP", "AMGN", "QCOM", "HON",
    "LOW", "UPS", "SBUX", "IBM", "MDT", "AMT", "CAT", "GS", "RTX", "BLK",
    "SPGI", "PLD", "CVS", "LMT", "ISRG", "DE", "T", "SYK", "MO", "MDLZ",
    "AXP", "SCHW", "CB", "GILD", "ZTS", "MMC", "ADP", "C", "ELV", "DUK",
    "SO", "CL", "USB", "TGT", "PNC", "BDX", "SHW", "CI", "BKNG", "APD",
    "REGN", "FISV", "EOG", "GM", "FDX", "AON", "PSX", "AIG", "HUM", "MET",

    # 20 Bond ETFs
    "BND", "AGG", "TLT", "LQD", "IEF", "SHY", "HYG", "JNK", "MUB", "BNDX",
    "TIP", "VCIT", "VCSH", "EMB", "BSV", "SCHO", "SPLB", "SPAB", "GOVT", "VGLT",

    # 20 Real Estate ETFs / REITs
    "VNQ", "SCHH", "IYR", "XLRE", "REM", "RWR", "FREL", "ICF", "USRT", "REET",
    "MORT", "DRN", "PSR", "ROOF", "KBWY", "FPRO", "SRVR", "HOMZ", "PFFR", "NURE",

    # 20 Commodity ETFs
    "GLD", "SLV", "DBC", "USO", "DBA", "PALL", "PPLT", "CORN", "WEAT", "SOYB",
    "UGA", "UNG", "IAU", "SGOL", "CPER", "JJG", "JO", "NIB", "WOOD", "COMT",

    # 20 Commodities (Spot/Futures, if available)
    "GC=F", "SI=F", "CL=F", "NG=F", "HG=F", "ZC=F", "ZS=F", "ZW=F", "LE=F", "HE=F",
    "KC=F", "SB=F", "CT=F", "OJ=F", "PL=F", "PA=F", "LBS=F", "CC=F", "RB=F", "HO=F",

    # 20 Cryptocurrencies (Yahoo Finance tickers)
    "BTC-USD", "ETH-USD", "USDT-USD", "BNB-USD", "SOL-USD", "XRP-USD", "ADA-USD", "DOGE-USD", "AVAX-USD", "TRX-USD",
    "LINK-USD", "MATIC-USD", "DOT-USD", "LTC-USD", "BCH-USD", "UNI1-USD", "ATOM1-USD", "XLM-USD", "FIL-USD", "ETC-USD",

    # 20 Alternative ETFs (Hedge Fund, Private Equity, Infrastructure, Multi-Asset, etc.)
    "ALTS", "QAI", "MNA", "HDG", "DBMF", "PSP", "GURU", "PFF", "JETS", "ARKK",
    "ARKW", "ARKG", "ARKF", "ARKQ", "ARKX", "SRLN", "HYLB", "HYD", "VYMI", "VIGI",

    # 15 International/Global ETFs (Developed, Emerging, Europe, Asia, etc.)
    "VEA", "VWO", "IEFA", "EFA", "EEM", "SPDW", "SCHF", "IEMG", "EWJ", "EWZ",
    "EWG", "EWT", "EWS", "EWL", "EWH"
]

# This dictionary can be used to get the asset list for downloading data


In [None]:
import sys
sys.path.append('/Users/eloibernier/Documents/Portfolio_Optimization_Sturturing/InvestmentPortfolioAI/notebooks/DescriptiveAnalysisPortfoli.ipynb_checkpoints')
import importlib
import globals
from globals import start, end, frequency

In [13]:
import yfinance as yf
import pandas as pd

def get_asset_universe(asset_list, start, end, frequency):
    """
    Fetches historical data for a list of assets and combines them into a single DataFrame.
    
    Parameters:
    asset_list (list): List of ticker symbols.
    start (str): Start date for the data.
    end (str): End date for the data.
    frequency (str): Data frequency (e.g., "1d" for daily).
    
    Returns:
    DataFrame: Combined historical data with tickers as columns and dates as rows.
    """
    combined_data = pd.DataFrame()
    
    for asset in asset_list:
        try:
            # Download data for the current asset
            data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
            # Add the data to the combined DataFrame
            combined_data[asset] = data
        except Exception as e:
            print(f"Error fetching data for {asset}: {e}")
    
    return combined_data

# Fetch and display the combined data
asset_universe = get_asset_universe(Asset_List, start, end, frequency)
print(asset_universe.head())


  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%***********************]  1 of 1 completed
  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%***********************]  1 of 1 completed
  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%***********************]  1 of 1 completed
  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%***********************]  1 of 1 completed
  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%***********************]  1 of 1 completed
  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%***********************]  1 of 1 completed
  data = yf.download(asset, start=start, end=end, interval=frequency)['Close']
[*********************100%*******

                 AAPL       MSFT      GOOGL    AMZN      NVDA  META      TSLA  \
Date                                                                            
2012-01-03  12.359183  21.039209  16.552629  8.9515  0.321845   NaN  1.872000   
2012-01-04  12.425600  21.534336  16.624023  8.8755  0.325513   NaN  1.847333   
2012-01-05  12.563551  21.754402  16.393423  8.8805  0.337204   NaN  1.808000   
2012-01-06  12.694888  22.092352  16.169790  9.1305  0.333307   NaN  1.794000   
2012-01-09  12.674752  21.801552  15.484212  8.9280  0.333307   NaN  1.816667   

                BRK-B        UNH        JPM  ...       SPDW      SCHF  IEMG  \
Date                                         ...                              
2012-01-03  77.680000  41.728512  24.135361  ...  15.345536  9.525185   NaN   
2012-01-04  76.800003  42.336327  24.288261  ...  15.249802  9.493539   NaN   
2012-01-05  76.930000  42.619965  24.795567  ...  15.092517  9.339271   NaN   
2012-01-06  76.389999  42.773964  24.


  combined_data[asset] = data


In [16]:
asset_universe.to_csv('asset_universe.csv')