In [47]:
import pandas as pd 
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import mplfinance as mpf
import yfinance as yf
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import os
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.wait import WebDriverWait

In [48]:
ticker = 'XLU'
url = f'https://www.sectorspdrs.com/mainfund/{ticker}'

In [49]:
def highlight_xpath(xpath, driver):
    driver.execute_script("arguments[0].style.backgroundColor = 'red'", xpath)
    
def scroll_to_xpath(xpath, driver):
    actions = ActionChains(driver)
    actions.move_to_element(xpath).perform()

In [50]:
options = webdriver.ChromeOptions()  # Changes default download path
file_path = os.path.join(os.getcwd(), 'Data')
prefs = {"download.default_directory" : file_path}
options.add_experimental_option("prefs", prefs)

options.add_argument("--headless=new")  # Currently a bug - blank white pop up
options.add_argument("--window-position=-2400,-2400")  # Move pop up off screen

cService = webdriver.ChromeService(executable_path=os.environ['ChromePath'])  # Chrome Driver path stored in env var
driver = webdriver.Chrome(service=cService, options=options)

driver.get(url)

#button = driver.find_element(By.XPATH,'//*[@id="__BVID__115"]/div/div[1]/div[2]/button[1]')
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, """//*[@id="__BVID__115"]/div/div[1]/div[2]/button[1]""")))
button = driver.find_element(By.XPATH,'//*[@id="__BVID__115"]/div/div[1]/div[2]/button[1]')
scroll_to_xpath(button, driver)
#highlight_xpath(button, driver)
button.click()

# Wait for file to be downloaded
t = 0
while not os.path.isfile(f'Data/index-holdings-{ticker.lower()}.csv'):
    time.sleep(0.01)
    if t >= 3 * 100:
        print("File not found within time")
        time.sleep(1)
        break
    t += 1

driver.quit()

In [51]:
holdings_df = pd.read_csv(f'Data/index-holdings-{ticker.lower()}.csv', header=1)
holdings_df

Unnamed: 0,Symbol,Company Name,Index Weight,Last,Change,%Change,Volume,52 Week Range
0,NEE,NextEra Energy Inc,13.89%,80.94,0.36,+0.45%,7.93 M,50.95 - 86.10
1,SO,Southern Co,8.06%,88.26,-0.67,-0.75%,3.29 M,64.53 - 91.87
2,DUK,Duke Energy Corp,7.30%,113.12,1.8,+1.62%,6.3 M,85.79 - 118.31
3,CEG,Constellation Energy Corp,6.85%,262.31,0.03,+0.01%,2.73 M,109.44 - 288.75
4,AEP,American Electric Power,4.32%,97.22,-0.5,-0.51%,1.67 M,72.01 - 105.18
5,SRE,Sempra,4.30%,81.33,-0.19,-0.23%,2.05 M,66.40 - 84.47
6,D,Dominion Energy Inc,3.89%,55.49,-0.89,-1.58%,5.51 M,39.18 - 58.94
7,PEG,Public Service Enterprise Grp,3.69%,88.7,-1.34,-1.49%,2.49 M,56.85 - 92.20
8,VST,Vistra Corp.,3.56%,124.11,-0.07,-0.06%,5.84 M,31.40 - 143.87
9,PCG,PG&E Corporation,3.49%,19.55,-0.01,-0.05%,7.05 M,15.59 - 20.65


In [52]:
def get_yahoo_data(ticker='XLF', start_date='2024-8-1', most_recent_friday=True, end_date='2023-12-31'):
    data_hold=yf.Ticker(ticker)
    
    if most_recent_friday:
        today = datetime.now()
        days_since_friday = (today.weekday() - 4) % 7
        last_friday = today - timedelta(days=days_since_friday)
        end_date = (last_friday.date() + timedelta(days=1)).strftime('%Y-%m-%d')
        
    price_df=data_hold.history(start=start_date,  end=end_date, interval='1h')[['Open','High','Low','Close','Volume']]
    
    price_df.name=ticker
    
    price_df['Return'] = price_df.Close.pct_change()
    price_df['Cumulative_Ret'] = (price_df['Return'] + 1).cumprod()
    
    price_df.at[price_df.index[0], 'Cumulative_Ret'] = 1
    
    return price_df

In [53]:
get_yahoo_data(holdings_df.iloc[0,0])

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Return,Cumulative_Ret
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-08-01 09:30:00-04:00,76.269997,76.745003,75.900002,76.629997,1687229,,1.000000
2024-08-01 10:30:00-04:00,76.629997,77.940002,76.629829,77.930000,2057480,0.016965,1.016965
2024-08-01 11:30:00-04:00,77.919998,78.019997,77.370003,77.849998,1227763,-0.001027,1.015921
2024-08-01 12:30:00-04:00,77.840103,78.050003,77.760002,77.760002,752640,-0.001156,1.014746
2024-08-01 13:30:00-04:00,77.760002,77.949997,77.639999,77.830002,727561,0.000900,1.015660
...,...,...,...,...,...,...,...
2024-10-10 11:30:00-04:00,81.440002,81.764999,81.290001,81.720001,639724,0.003438,1.066423
2024-10-10 12:30:00-04:00,81.735001,81.745003,80.820000,80.949997,601334,-0.009422,1.056375
2024-10-10 13:30:00-04:00,80.945000,81.309998,80.919998,81.235001,607796,0.003521,1.060094
2024-10-10 14:30:00-04:00,81.230003,81.245697,80.915001,81.120003,677382,-0.001416,1.058593
