# SIADS 699 Capstone: June-Aug 2023:  ETF Data

In [2]:
# Import modules
import numpy as np
import pandas as pd
import yfinance as yf
import talib as ta


# Description of ETF Tickers

In [4]:
# Create lists of relevant tickers

# US Equity Securities 

# SPY:  S&P 500
# IJH:  Mid-Cap US Equity
# IJR:  Small-cap US Equity

# US Sector ETFs

# QQQ:  Technology
# VHT:  Health Care
# XLE:  Energy
# VNQ:  Real Estate
# XLF:  Financials
# XLP:  Consumer Staples
# XLY:  Consumer Discretionary
benchmarks = ['SPY','TLT']
equity_tickers = ['SPY','XLE','XLU','XLK','XLB','XLP','XLY','XLI','XLC','XLV','XLF','XLRE']

# International Equity Securities

# VEA:  Developed Markets
# VWO:  Emerging Markets

# Fixed Income Securities

# TLT:  Long-dated Treasuries (20 yrs)
# IEF:  Intermediate-dated Treasures(7-10 yrs)
# SHY:  Short-dated Treasuries ()
# LQD:  Investment grade corporate debt
# HYG:  High Yield corporate debt

debt_tickers = ['TLT','HYXF','SJNK','STOT','SPTI','LQD']

all_tickers = equity_tickers + debt_tickers
print(len(all_tickers))

18


# Function to get ETF Data

In [5]:
def get_hist_ETF_data(tickers,start_date,end_date):
    
    combined_etf_data = pd.DataFrame()
    
    for i in tickers:
    
        # Get ETF data
        data = yf.download(i, start=start_date, end=end_date, interval = "1d")
        
        # Get dividend data
        
        # Get div info for specific stock
        ticks = yf.Ticker(i)
        # Get historical div payments
        ticks_hist = ticks.history(start=start_date, end=end_date, interval = "1d")['Dividends']
        # Convert into df
        ticks_hist_df = pd.DataFrame(ticks_hist)
        # Make index same as base df
        ticks_hist_df.index = data.index
        # Add new column to base df with dividend info
        data["Dividends"] = ticks_hist_df
        
        # Simple moving average
        data['MA'] = ta.SMA(data['Adj Close'],timeperiod=5)

        # Exponential moving average
        data['EMA'] = ta.EMA(data['Adj Close'],timeperiod=5)

        # Relative Strength Index (RSI)
        data['RSI'] = ta.RSI(data['Adj Close'],timeperiod=14)

        # Moving Average Convergence-Divergence (MACD)
        macd, macdsignal, macdhist = ta.MACD(data['Adj Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        data['MACD'] = macd
        data['Signal'] = macdsignal
        
        # Calculate different between MACD and Signal lines
        data['MACD_minus_signal'] = data['MACD'] - data['Signal']
        
        # Take subset of needed columns
        subset_df = data[['Adj Close','Dividends','EMA','RSI','MACD','Signal','MACD_minus_signal']]
        subset_df.columns = [str(i)+'_Adj Close',str(i)+'_Dividends',str(i)+'_EMA',str(i)+'_RSI',
                             str(i)+'_MACD',str(i)+'_Signal',str(i)+'_MACD_minus_signal']
        
        # Add subset_df to combined df
        combined_etf_data = pd.concat([combined_etf_data,subset_df],axis=1)
        
    return combined_etf_data

# Test function on 60/40 Equity/Debt portfolio

In [6]:
# Set params for function
ticker_test = ['SPY','XLE','XLU','XLK','XLB','XLP','XLY','XLI','XLC','XLV','XLF','XLRE','TLT','HYXF','SJNK','SPTI','LQD']
start_date = "2007-11-01"
end_date = "2023-7-17"

In [7]:
# Run function
test_60_40 = get_hist_ETF_data(ticker_test,start_date,end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Add Back Dividends to Adjusted Close

In [8]:
ticker_quotes = set(ticker_test)^set(benchmarks) # excludes  benchmarks 


In [9]:
ticker_quotes = set(ticker_test)^set(benchmarks) # excludes  benchmarks 

for j in benchmarks:
    test_60_40[(j)+'_Adj Close'] = test_60_40[(j)+'_Adj Close']+ test_60_40[(j)+'_Dividends']    
    test_60_40 = test_60_40.drop([(j)+'_Dividends'], axis=1)


for i in ticker_quotes:
    test_60_40[(i)+'_Adj Close'] = test_60_40[(i)+'_Adj Close']+ test_60_40[(i)+'_Dividends']
    test_60_40 = test_60_40.drop([(i)+'_Dividends',(i)+'_EMA',(i)+'_RSI',(i)+'_MACD',(i)+'_Signal',(i)+'_MACD_minus_signal'], axis=1)

In [12]:
# Show results
test_60_40.to_csv('Excel_data/test_60_40_advanced.csv')