In [1]:
import yfinance as yf
from datetime import datetime
import pandas as pd

In [2]:
etf_tickers = [
    # US Major ETFs
    'SPY', 'IVV', 'VOO', 'VTI', 'QQQ', 'DIA', 'IWM', 'IJH', 'MDY', 'RSP',
    'ITOT', 'SCHB', 'IWF', 'IWD', 'VUG', 'VTV', 'IJS', 'IJT', 'VONV', 'VONG',
    'VTWO', 'IWO', 'IWN', 'IWP', 'IWS', 'IWV', 'SCHX', 'SCHA', 'SCHV', 'SCHG',
    
    # Sector ETFs
    'XLK', 'VGT', 'XLF', 'VFH', 'XLV', 'VHT', 'XLE', 'VDE', 'XLI', 'VIS',
    'XLY', 'VCR', 'XLP', 'VDC', 'XLU', 'VPU', 'XLB', 'VAW', 'XLC', 'XLRE',
    'IGV', 'IBB', 'IYW', 'IYF', 'IYH', 'IYE', 'IYM', 'IYK', 'IYC', 'IYZ',
    'SMH', 'SOXX', 'KBE', 'KRE', 'XHB', 'XRT', 'XSD', 'XSW', 'XTN', 'XES',
    
    # International Developed Markets
    'EFA', 'VEA', 'IEFA', 'SCHF', 'EWJ', 'EWG', 'EWU', 'EWL', 'EWQ', 'EWP',
    'EWI', 'EWD', 'EWN', 'EWO', 'EWK', 'EWA', 'EWC', 'EIRL', 'EFNL', 'EDEN',
    'ENOR', 'EWS', 'EWH', 'EWJE', 'EWJV', 'JPXN', 'DXJS', 'HEWJ', 'GREK', 'ERUS',
    
    # Emerging Markets
    'VWO', 'EEM', 'IEMG', 'SCHE', 'MCHI', 'INDA', 'EWZ', 'EWW', 'EWT', 'EWY',
    'THD', 'EWM', 'EPHE', 'EPOL', 'ECH', 'EZA', 'TUR', 'UAE', 'QAT', 'EGPT',
    'VNM', 'IDX', 'EIDO', 'ENZL', 'KSA', 'KWEB', 'CQQQ', 'ASHR', 'FXI', 'GXC',
    
    # Fixed Income
    'AGG', 'BND', 'LQD', 'TLT', 'IEF', 'SHY', 'HYG', 'JNK', 'EMB', 'MUB',
    'VCIT', 'VCSH', 'BNDX', 'VGIT', 'VGSH', 'VMBS', 'VTEB', 'GOVT', 'SHV', 'BIL',
    'IGOV', 'BWX', 'WIP', 'TIP', 'LTPZ', 'STIP', 'SCHP', 'ZROZ', 'EDV', 'VGLT',
    
    # Commodities
    'GLD', 'IAU', 'SLV', 'PPLT', 'PALL', 'DBC', 'GSG', 'DJP', 'USO', 'UNG',
    'DBA', 'CORN', 'WEAT', 'SOYB', 'JO', 'NIB', 'SGG', 'BAL', 'URA', 'REMX',
    'COPX', 'PICK', 'SLX', 'GDX', 'GDXJ', 'SILJ', 'GOAU', 'RING', 'SLVP', 'URNM',
    
    # Real Estate
    'VNQ', 'IYR', 'SCHH', 'REM', 'MORT', 'REZ', 'SRET', 'KBWY', 'ROOF', 'NETL',
    'REET', 'IFGL', 'RWO', 'VNQI', 'DRN', 'SRVR', 'INDS', 'HOMZ', 'BBRE', 'PPTY',
    
    # Thematic
    'ARKK', 'ARKG', 'ARKF', 'ARKW', 'ARKX', 'BOTZ', 'ROBO', 'AIQ', 'IRBO', 'ICLN',
    'TAN', 'FAN', 'PBW', 'QCLN', 'GAMR', 'ESPO', 'HERO', 'SOCL', 'ONLN', 'IBUY',
    'EMQQ', 'FINX', 'BUZZ', 'MOON', 'UFO', 'NERD', 'AWAY', 'CLOU', 'WCLD', 'IPO',
    
    # Smart Beta
    'MTUM', 'QUAL', 'SIZE', 'VLUE', 'USMV', 'EFAV', 'EEMV', 'ACWV', 'FVD', 'SPHD',
    'NOBL', 'SDY', 'VIG', 'DGRO', 'SCHD', 'HDV', 'SPLV', 'XMLV', 'XSLV', 'SPHQ',
    
    # Currency ETFs
    'UUP', 'UDN', 'FXE', 'FXB', 'FXF', 'FXY', 'FXA', 'FXC', 'CYB', 'CNY',
    'FXCH', 'KROO', 'BZF', 'INR', 'AUM', 'ICN', 'RUB', 'DBP', 'CEW', 'CCX',
    
    # Inverse & Leveraged
    'TQQQ', 'SQQQ', 'UPRO', 'SPXU', 'SSO', 'SDS', 'QLD', 'QID', 'SPXL', 'SPXS',
    'TNA', 'TZA', 'UDOW', 'SDOW', 'TECL', 'TECS', 'FAS', 'FAZ', 'ERX', 'ERY',
    
    # Multi-Asset
    'AOA', 'AOR', 'AOM', 'AOK', 'GAL', 'MDIV', 'IYLD', 'GYLD', 'PCEF', 'YYY',
    'INKM', 'DVHL', 'CVY', 'RDIV', 'RNDM', 'RNEM', 'RNSC', 'RNDV', 'RDOG', 'RALS',
    
    # Alternative Strategy
    'QAI', 'MNA', 'BTAL', 'CSM', 'CPI', 'CSLS', 'HSPX', 'VAMO', 'DYLS', 'RORO',
    'TAIL', 'SWAN', 'DRSK', 'DFND', 'PTLC', 'PTMC', 'PTNQ', 'PTEU', 'PTJP', 'PTEM',
    
    # Regional ETFs
    'AAXJ', 'AIA', 'EPP', 'GMF', 'ADRA', 'ADRD', 'ADRE', 'ADRU', 'PAF', 'PGAL',
    'NORW', 'EDEN', 'EFNL', 'EIRL', 'EIS', 'EPOL', 'ESR', 'EUFN', 'EUFX', 'EWD',
    
    # More International
    'ACWI', 'ACWX', 'DWX', 'DEM', 'DGS', 'EDIV', 'DVYE', 'EMHY', 'HYEM', 'LEMB',
    'PCY', 'TEI', 'ELD', 'EMLC', 'VWOB', 'ANGL', 'EMAG', 'EMCB', 'IGEB', 'CBON',
    
    # Additional Sector/Industry
    'PBJ', 'PEJ', 'PBS', 'PEZ', 'PSI', 'PYZ', 'RYE', 'RYF', 'RYH', 'RYI',
    'RYK', 'RYM', 'RYN', 'RYP', 'RYT', 'RYU', 'RCD', 'RTM', 'RHS', 'RPG',
    
    # Additional Thematic
    'HACK', 'CIBR', 'SKYY', 'SNSR', 'BLOK', 'LEGR', 'KOIN', 'DAPP', 'BKCH', 'IDRV',
    'DRIV', 'LIT', 'BATT', 'REMX', 'CNRG', 'ACES', 'SMOG', 'CTEC', 'PBD', 'GEX',
    
    # ESG ETFs
    'DSI', 'SUSA', 'ESGV', 'ESGU', 'SUSL', 'USSG', 'ESGE', 'ESGD', 'KRBN', 'LCTU',
    'EAGG', 'EAOA', 'EAOE', 'EAOK', 'EAOM', 'EASG', 'EFIV', 'EMXC', 'ERTH', 'ESGA',
    
    # Additional Smart Beta
    'PWB', 'PWV', 'PFM', 'PKW', 'PXQ', 'PRF', 'PRFZ', 'PDP', 'PTF', 'PUI',
    'PXI', 'PXE', 'PBE', 'PJP', 'PSP', 'PSI', 'PXJ', 'PBD', 'PIO', 'PHO',
    
    # Additional Fixed Income
    'FLOT', 'FLRN', 'NEAR', 'ICSH', 'GSY', 'VRIG', 'FTSM', 'MINT', 'JPST', 'SGOV',
    'TFLO', 'SHYL', 'SHYG', 'GHYG', 'FALN', 'ANGL', 'HYDB', 'HYDW', 'SHYD', 'HYGV',
    
    # Additional Commodities
    'PDBC', 'COMT', 'COMB', 'BCI', 'BCD', 'BCM', 'GCC', 'USCI', 'UCI', 'DDP',
    'GSP', 'DPU', 'UCD', 'UGE', 'UAG', 'GRN', 'TAGS', 'DJCI', 'FTGC', 'CPER',
    
    # More International Developed
    'IEUR', 'HEZU', 'BBEU', 'EURL', 'FEP', 'BBAX', 'IQLT', 'HEFA', 'EFAS', 'IEUS',
    'HFXE', 'DEZU', 'FEUZ', 'GSEU', 'EUCG', 'EUDG', 'EUDV', 'EUMV', 'EUSC', 'FDD',
    
    # More Emerging Markets
    'EEMO', 'FNDE', 'HEEM', 'DGRE', 'EMCG', 'EEMS', 'EELV', 'TLTE', 'PBEE', 'FEM',
    'EDOG', 'PXH', 'PIE', 'DEM', 'EDIV', 'EMGF', 'EMDV', 'XSOE', 'EMSH', 'ESG',
    
    # Additional Regional
    'ECON', 'EMDD', 'EMFM', 'FRN', 'ARGT', 'NGE', 'EZA', 'KSA', 'GULF', 'MES',
    'EGPT', 'FM', 'FRN', 'GVAL', 'ITEQ', 'ISRA', 'PAK', 'PGAL', 'PLND', 'VNQI',

    # Vanguard Active Mutual Funds
    'VDIGX', # Vanguard Dividend Growth
    'VHCAX', # Vanguard Capital Opportunity
    'VPMAX', # Vanguard PRIMECAP
    'VWUSX', # Vanguard US Growth
    'VGPMX', # Vanguard Global Capital Cycles
    'VEIPX', # Vanguard Equity Income
    'VEXPX', # Vanguard Explorer
    'VWILX', # Vanguard International Growth
    'VWENX', # Vanguard Wellington
    'VWINX', # Vanguard Wellesley Income
    'VGELX', # Vanguard Global Equity
    'VTRIX', # Vanguard International Value
    'VSEQX', # Vanguard Strategic Equity
    'VMRAX', # Vanguard Morgan Growth
    'VMRGX', # Vanguard PRIMECAP Core
    
    # Fidelity Active Funds
    'FCNTX', # Fidelity Contrafund
    'FMAGX', # Fidelity Magellan
    'FBGRX', # Fidelity Blue Chip Growth
    'FDGRX', # Fidelity Growth Company
    'FGRIX', # Fidelity Growth & Income
    'FLPSX', # Fidelity Low-Priced Stock
    'FOCPX', # Fidelity OTC Portfolio
    'FSRPX', # Fidelity Select Retailing
    'FSPHX', # Fidelity Select Health Care
    'FSELX', # Fidelity Select Electronics
    'FSCSX', # Fidelity Select Software & IT
    'FDVLX', # Fidelity Value
    'FEXPX', # Fidelity Export & Multinational
    'FBALX', # Fidelity Balanced
    'FEQIX', # Fidelity Equity-Income
    
    # T. Rowe Price Active Funds
    'TRBCX', # T. Rowe Price Blue Chip Growth
    'PRGFX', # T. Rowe Price Growth Stock
    'TRMCX', # T. Rowe Price Mid-Cap Growth
    'RPMGX', # T. Rowe Price Mid-Cap Growth
    'TRGOX', # T. Rowe Price Global Stock
    'PRHSX', # T. Rowe Price Health Sciences
    'PRITX', # T. Rowe Price International Stock
    'PRFDX', # T. Rowe Price Equity Income
    'PRWCX', # T. Rowe Price Capital Appreciation
    'PRSCX', # T. Rowe Price Science & Technology
    'PRNHX', # T. Rowe Price New Horizons
    'PREFX', # T. Rowe Price European Stock
    'PRMSX', # T. Rowe Price Emerging Markets Stock
    'PRSVX', # T. Rowe Price Small-Cap Value
    'OTCFX', # T. Rowe Price Small-Cap Stock
    
    # American Funds
    'AGTHX', # American Funds Growth Fund of America
    'AMCPX', # American Funds AMCAP
    'AIVSX', # American Funds Investment Company of America
    'AWSHX', # American Funds Washington Mutual
    'ANCFX', # American Funds Fundamental Investors
    'ANWPX', # American Funds New Perspective
    'AEPGX', # American Funds EuroPacific Growth
    'CWGIX', # American Funds Capital World Growth & Income
    'SMCWX', # American Funds SMALLCAP World
    'CAIBX', # American Funds Capital Income Builder
    'AMRMX', # American Funds American Mutual
    'ABNRX', # American Funds Bond Fund of America
    'ABALX', # American Funds American Balanced
    'AMECX', # American Funds Income Fund of America
    'AMERICX', # American Funds American High-Income Municipal Bond
    
    # PIMCO Active Funds
    'PTTRX', # PIMCO Total Return
    'PTTAX', # PIMCO Tax-Managed Real Return
    'PFORX', # PIMCO Foreign Bond
    'PEBIX', # PIMCO Emerging Markets Bond
    'PSTAX', # PIMCO Short-Term
    'PFRDX', # PIMCO Real Return
    'PDVAX', # PIMCO Diversified Income
    'PSPTX', # PIMCO Short-Term Municipal Bond
    'PHMIX', # PIMCO High Yield Municipal Bond
    'PTRIX', # PIMCO Real Estate Real Return Strategy
    
    # Franklin Templeton Active Funds
    'FKINX', # Franklin Income
    'TEMIX', # Templeton Global Bond
    'TEPLX', # Templeton Growth
    'TEDIX', # Templeton Developing Markets
    'FKTFX', # Franklin Federal Tax-Free Income
    'FKGRX', # Franklin Growth
    'FRSGX', # Franklin Rising Dividends
    'TMGVX', # Templeton Global Value
    'FISCX', # Franklin Small Cap Growth
    'FRDPX', # Franklin Dynatech
    
    # JPMorgan Active Funds
    'OSHAX', # JPMorgan Large Cap Growth
    'VHIAX', # JPMorgan High Yield
    'OIEIX', # JPMorgan Equity Income
    'HLGEX', # JPMorgan Global Healthcare
    'JUESX', # JPMorgan US Equity
    'JSOSX', # JPMorgan Small Cap Growth
    'OGMIX', # JPMorgan Mid Cap Growth
    'JVASX', # JPMorgan Value Advantage
    'UNDIX', # JPMorgan Undiscovered Managers Behavioral Value
    'VSFIX', # JPMorgan Small Cap Value
    
    # Dodge & Cox Funds
    'DODGX', # Dodge & Cox Stock
    'DODFX', # Dodge & Cox International Stock
    'DODWX', # Dodge & Cox Global Stock
    'DODIX', # Dodge & Cox Income
    'DODLX', # Dodge & Cox Global Bond
    'DODBX', # Dodge & Cox Balanced
    
    # BlackRock Active Funds
    'MDDVX', # BlackRock Capital Appreciation
    'MDCDX', # BlackRock Equity Dividend
    'MAFHX', # BlackRock Global Allocation
    'MAHQX', # BlackRock Health Sciences Opportunities
    'MAFOX', # BlackRock Science & Technology Opportunities
    'MACSX', # BlackRock Strategic Global Bond
    'MAISX', # BlackRock International Opportunities
    'MRDEX', # BlackRock Emerging Markets
    'MCLOX', # BlackRock Long-Horizon Equity
    'MALOX', # BlackRock Global Long/Short Equity
    
    # Dimensional Fund Advisors (DFA)
    'DFSVX', # DFA U.S. Small Cap Value
    'DFVEX', # DFA Vector Equity
    'DFCEX', # DFA Core Equity
    'DFIVX', # DFA International Value
    'DISVX', # DFA International Small Cap Value
    'DFEVX', # DFA Emerging Markets Value
    'DFEMX', # DFA Emerging Markets Core Equity
    'DFGEX', # DFA Global Equity
    'DFIGX', # DFA Intermediate Government Fixed Income
    'DFVQX'  # DFA VA U.S. Vector Equity
]


In [3]:
# Define the date range
start_date = "2009-12-21"
end_date = "2019-12-29"

# Dictionary to store daily adjusted close prices for ETFs
etf_data = {}

etf_data = yf.download(etf_tickers, start=start_date, end=end_date, interval='1d')['Adj Close']

# Resample to weekly data by taking the last trading day of each week
etf_prices_weekly = etf_data.resample('W').last()

# Calculate weekly returns
weekly_returns = etf_prices_weekly.pct_change()

# Display the first few rows of weekly returns
print(weekly_returns.head())

[*********************100%***********************]  659 of 659 completed

36 Failed downloads:
['RYP', 'MDCDX', 'RYK', 'INR']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2009-12-21 -> 2019-12-29)')
['EMDD', 'CCX', 'PFRDX', 'MAFHX', 'OSHAX', 'UNDIX', 'AMERICX', 'PTJP', 'MES', 'ADRA', 'PTEM', 'MRDEX', 'EAOE']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
['ESGA', 'LCTU', 'DAPP', 'AWAY', 'EFIV', 'ARKX', 'MOON', 'BUZZ', 'EMCG', 'EUCG', 'EAOM', 'TRGOX', 'SGOV', 'KRBN', 'EAOA', 'CTEC', 'RORO', 'EAOK', 'BKCH']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2009-12-21 -> 2019-12-29) (Yahoo error = "Data doesn\'t exist for startDate = 1261371600, endDate = 1577595600")')


Ticker                         AAXJ     ABALX  ABNRX  ACES      ACWI  ACWV  \
Date                                                                         
2009-12-27 00:00:00+00:00       NaN       NaN    NaN   NaN       NaN   NaN   
2010-01-03 00:00:00+00:00  0.006141 -0.006131    NaN   NaN -0.004707   NaN   
2010-01-10 00:00:00+00:00  0.035900  0.020358    NaN   NaN  0.030267   NaN   
2010-01-17 00:00:00+00:00 -0.014729 -0.000604    NaN   NaN -0.005738   NaN   
2010-01-24 00:00:00+00:00 -0.054344 -0.022384    NaN   NaN -0.051477   NaN   

Ticker                         ACWX  ADRA      ADRD      ADRE  ...       XLY  \
Date                                                           ...             
2009-12-27 00:00:00+00:00       NaN   NaN       NaN       NaN  ...       NaN   
2010-01-03 00:00:00+00:00 -0.003653   NaN -0.001909  0.007764  ... -0.013585   
2010-01-10 00:00:00+00:00  0.035932   NaN  0.026119  0.033764  ...  0.020826   
2010-01-17 00:00:00+00:00 -0.007787   NaN -0.002273 -

  weekly_returns = etf_prices_weekly.pct_change()


In [4]:
weekly_returns = weekly_returns.drop(weekly_returns.index[0])
weekly_returns

Ticker,AAXJ,ABALX,ABNRX,ACES,ACWI,ACWV,ACWX,ADRA,ADRD,ADRE,...,XLY,XMLV,XRT,XSD,XSLV,XSOE,XSW,XTN,YYY,ZROZ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-03 00:00:00+00:00,0.006141,-0.006131,,,-0.004707,,-0.003653,,-0.001909,0.007764,...,-0.013585,,-0.019013,-0.000630,,,,,,0.011547
2010-01-10 00:00:00+00:00,0.035900,0.020358,,,0.030267,,0.035932,,0.026119,0.033764,...,0.020826,,0.025843,0.013866,,,,,,-0.022271
2010-01-17 00:00:00+00:00,-0.014729,-0.000604,,,-0.005738,,-0.007787,,-0.002273,-0.035511,...,-0.013491,,-0.018894,-0.062577,,,,,,0.030966
2010-01-24 00:00:00+00:00,-0.054344,-0.022384,,,-0.051477,,-0.057789,,-0.052392,-0.064773,...,-0.032021,,-0.030979,-0.022105,,,,,,0.020506
2010-01-31 00:00:00+00:00,-0.031616,-0.008045,,,-0.025310,,-0.027763,,-0.032212,-0.034751,...,-0.003791,,-0.002880,-0.049050,,,,,,-0.009339
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-01 00:00:00+00:00,-0.002149,0.005610,0.000000,0.020556,0.005968,0.002100,0.000835,,0.005502,0.020385,...,0.017787,0.010600,0.019161,0.014625,0.014441,-0.001164,0.026873,0.009070,0.005689,0.005131
2019-12-08 00:00:00+00:00,0.008471,0.000348,-0.000913,0.002472,0.004256,0.004714,0.007301,,0.003648,0.010078,...,-0.005798,0.005806,0.001790,0.005725,0.003954,0.014875,-0.022473,-0.012397,0.000565,-0.020490
2019-12-15 00:00:00+00:00,0.031036,0.006623,0.004570,0.003390,0.012970,0.000521,0.019673,,0.021354,0.029283,...,0.011746,-0.009870,-0.002011,0.038118,-0.004332,0.029720,-0.007962,0.002197,0.004522,0.007890
2019-12-22 00:00:00+00:00,0.021165,0.010697,0.000910,0.037536,0.011971,0.010610,0.008339,,0.007117,0.020649,...,0.014538,0.018808,0.028061,0.036326,0.018592,0.022958,0.025883,0.017849,0.014068,-0.023269


In [None]:
# Step 2: Define the threshold for minimum non-NaN data (80% non-NaN, meaning up to 20% NaNs are allowed)
threshold = 0.8

# Calculate the minimum number of non-NaN values required for each column
min_non_na_count = int(threshold * len(weekly_returns))

# Drop columns with less than the minimum required non-NaN values
filtered_weekly_returns = weekly_returns.dropna(axis=1, thresh=min_non_na_count)
filled_weekly_returns = filtered_weekly_returns.fillna(filtered_weekly_returns.mean())

# Display the resulting DataFrame and the columns retained
print("Columns retained after filtering:")
print(filtered_weekly_returns.isnull().sum())
print(filtered_weekly_returns.head())

Columns retained after filtering:
Ticker
AAXJ      0
ABALX     0
ACWI      0
ACWV     94
ACWX      0
         ..
XRT       0
XSD       0
XSW      91
XTN      56
ZROZ      0
Length: 438, dtype: int64
Ticker                         AAXJ     ABALX      ACWI  ACWV      ACWX  \
Date                                                                      
2010-01-10 00:00:00+00:00  0.035900  0.020358  0.030267   NaN  0.035932   
2010-01-17 00:00:00+00:00 -0.014729 -0.000604 -0.005738   NaN -0.007787   
2010-01-24 00:00:00+00:00 -0.054344 -0.022384 -0.051477   NaN -0.057789   
2010-01-31 00:00:00+00:00 -0.031616 -0.008045 -0.025310   NaN -0.027763   
2010-02-07 00:00:00+00:00 -0.034185 -0.004366 -0.013983   NaN -0.027778   

Ticker                         ADRD      ADRE      ADRU     AEPGX       AGG  \
Date                                                                          
2010-01-10 00:00:00+00:00  0.026119  0.033764  0.020652  0.030517  0.004555   
2010-01-17 00:00:00+00:00 -0.002273 -0

In [6]:
filled_weekly_returns.to_csv('data\Funds_weekly_returns.csv')