In [89]:
import pandas as pd
from datetime import datetime

In [90]:
import os
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
from datetime import datetime

def get_corporate_bond_holdings():
    """
    Scrapes holdings data for Vanguard USD Corporate Bond UCITS ETF from Vanguard UK website.
    
    Returns:
        pandas.DataFrame: ETF holdings data
    """
    url = "https://www.vanguard.co.uk/professional/product/etf/bond/9594/usd-corporate-bond-ucits-etf-usd-accumulating"
    path = r"C:\Users\jamie\OneDrive\Python\Py_24\Git_master\econ\VDPA"

    path_len = len(os.listdir(path))

    # Setup Chrome options
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1920,1080')  # Set a larger window size
    options.add_experimental_option("prefs", {
        "download.default_directory": path,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
        })
    
    # Initialize webdriver
    print("Initializing Chrome webdriver...")
    driver = webdriver.Chrome(options=options)
    
    # Navigate to the page
    print(f"Navigating to {url}...")
    driver.get(url)
    
    # Handle cookie consent #1
    try:
        cookie_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
        )
        cookie_button.click()
        time.sleep(2)
    except TimeoutException:
        print("First cookie banner found or already accepted")

    # Handle cookie consent #2 
    try:
        cookie_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//europe-core-cookie-consent-dialog//button[span[contains(text(), 'agree')]]"))    #//[tagname]//[button][index]
        )
        cookie_button.click()
        time.sleep(2)
    except TimeoutException:
        print("First cookie banner found or already accepted")
    
    # Wait for and locate the holdings table
    print("Extracting holdings data...")
    holdings_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//europe-core-fund-holdings//button[1]"))    #//[tagname]//[button][index]
        )
    holdings_button.click()
    
    WebDriverWait(driver,timeout=2).until(
        lambda x: len(os.listdir(path)) > path_len and 
        True not in ['.crdownload' in i for i in os.listdir(path)]
    )
    print('File downloaded')

    driver.quit()

get_corporate_bond_holdings()

Initializing Chrome webdriver...
Navigating to https://www.vanguard.co.uk/professional/product/etf/bond/9594/usd-corporate-bond-ucits-etf-usd-accumulating...
Extracting holdings data...
File downloaded


In [91]:
target_cols = ['Ticker', 'Holding', 'Market_Val_Percent', 'Market_Val_USD', 
        'Face_Val_USD', 'Coupon_Percent', 'Maturity_Date']

expected_cols = ['Ticker','Holding name','% of market value',
                 'Market value','Face amount','Coupon/Yield','Maturity date']

cols = dict(zip(expected_cols,target_cols))

dateparse = lambda x: datetime.strptime(x, '%d %b %Y')

df = pd.read_excel(
    'Holdings details - Vanguard USD Corporate Bond UCITS ETF (USD) Accumulating - 11_11_2024.xlsx',
    skiprows=6)[:-2].rename(columns=cols)

df['Market_Val_Percent'] = df['Market_Val_Percent'].apply(lambda x: float(x.replace('%', '')))
df['Market_Val_USD'] = df['Market_Val_USD'].apply(lambda x: float(str(x).replace('US$', '').replace(',', '')))
df['Face_Val_USD']=df['Face_Val_USD'].apply(lambda x: float(str(x).replace(',', '')))
df['Coupon_Percent']=df['Coupon_Percent'].apply(lambda x: float(str(x).replace('%', '')))
df['Maturity_Date']=df['Maturity_Date'].apply(lambda x: pd.to_datetime(x,format='%d %b %Y'))


# },date_format='%d %b %Y')

#parse_dates=['Maturity_Date']

#df['ann_coupon'] = df['Face_Val_USD']*df['Coupon_Percent']




In [92]:
df

Unnamed: 0,Ticker,Holding,Market_Val_Percent,Market_Val_USD,Face_Val_USD,Coupon_Percent,Maturity_Date
0,T,United States Treasury Note/Bond,0.4426,14584864.38,14626000.0,3.50,2026-09-30
1,ABIBB,Anheuser-Busch Cos LLC / Anheuser-Busch InBev ...,0.1007,3317856.23,3370000.0,4.90,2046-02-01
2,CVS,CVS Health Corp,0.0798,2628037.32,2880000.0,5.05,2048-03-25
3,TMUS,T-Mobile USA Inc,0.0745,2455508.44,2525000.0,3.88,2030-04-15
4,TD,Toronto-Dominion Bank/The,0.0727,2394672.22,2395000.0,4.11,2027-06-08
...,...,...,...,...,...,...,...
9774,MAALRA,MAR Sukuk Ltd,0.0000,0.00,0.0,2.21,2025-09-02
9775,ABQKQD,ABQ Finance Ltd,0.0000,0.00,0.0,1.88,2025-09-08
9776,TINGYI,Tingyi Cayman Islands Holding Corp,0.0000,0.00,0.0,1.63,2025-09-24
9777,CONAMP,Contemporary Ruiding Development Ltd,0.0000,0.00,0.0,1.88,2025-09-17


In [5]:
df.ann_coupon.sum()/df.Face_Val_USD.sum()

4.2188618113397665

In [58]:

cols = ['Ticker', 'Holding', 'Market_Val_Percent', 'Market_Val_USD', 
        'Face_Val_USD', 'Coupon_Percent', 'Maturity_Date']

expected_cols = ['Ticker','Holding name','% of market value',
                 'Market value','Face amount','Coupon/Yield','Maturity date']

col_mapping = dict(zip(expected_cols,cols))