In [90]:
import os
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
from datetime import datetime

def get_corporate_bond_holdings():
    """
    Scrapes holdings data for Vanguard USD Corporate Bond UCITS ETF from Vanguard UK website.
    
    Returns:
        pandas.DataFrame: ETF holdings data
    """
    url = "https://www.vanguard.co.uk/professional/product/etf/bond/9594/usd-corporate-bond-ucits-etf-usd-accumulating"
    path = r"C:\Users\jamie\OneDrive\Python\Py_24\Git_master\econ\VDPA"

    path_len = len(os.listdir(path))

    # Setup Chrome options
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1920,1080')  # Set a larger window size
    options.add_experimental_option("prefs", {
        "download.default_directory": path,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
        })
    
    # Initialize webdriver
    print("Initializing Chrome webdriver...")
    driver = webdriver.Chrome(options=options)
    
    # Navigate to the page
    print(f"Navigating to {url}...")
    driver.get(url)
    
    # Handle cookie consent #1
    try:
        cookie_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
        )
        cookie_button.click()
        time.sleep(2)
    except TimeoutException:
        print("First cookie banner found or already accepted")

    # Handle cookie consent #2 
    try:
        cookie_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//europe-core-cookie-consent-dialog//button[span[contains(text(), 'agree')]]"))    #//[tagname]//[button][index]
        )
        cookie_button.click()
        time.sleep(2)
    except TimeoutException:
        print("First cookie banner found or already accepted")
    
    # Wait for and locate the holdings table
    print("Extracting holdings data...")
    holdings_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//europe-core-fund-holdings//button[1]"))    #//[tagname]//[button][index]
        )
    holdings_button.click()
    
    WebDriverWait(driver,timeout=2).until(
        lambda x: len(os.listdir(path)) > path_len and 
        True not in ['.crdownload' in i for i in os.listdir(path)]
    )
    print('File downloaded')

    driver.quit()

get_corporate_bond_holdings()

Initializing Chrome webdriver...
Navigating to https://www.vanguard.co.uk/professional/product/etf/bond/9594/usd-corporate-bond-ucits-etf-usd-accumulating...
Extracting holdings data...
File downloaded


In [129]:
import numpy as np

def weighted_quantile(values, quantiles, sample_weight=None, 
                      values_sorted=False, old_style=False):
    """ Very close to numpy.percentile, but supports weights.
    NOTE: quantiles should be in [0, 1]!
    :param values: numpy.array with data
    :param quantiles: array-like with many quantiles needed
    :param sample_weight: array-like of the same length as `array`
    :param values_sorted: bool, if True, then will avoid sorting of
        initial array
    :param old_style: if True, will correct output to be consistent
        with numpy.percentile.
    :return: numpy.array with computed quantiles.
    """
    values = np.array(values)
    quantiles = np.array(quantiles)
    if sample_weight is None:
        sample_weight = np.ones(len(values))
    sample_weight = np.array(sample_weight)
    assert np.all(quantiles >= 0) and np.all(quantiles <= 1), \
        'quantiles should be in [0, 1]'

    if not values_sorted:
        sorter = np.argsort(values)
        values = values[sorter]
        sample_weight = sample_weight[sorter]

    weighted_quantiles = np.cumsum(sample_weight) - 0.5 * sample_weight
    if old_style:
        # To be convenient with numpy.percentile
        weighted_quantiles -= weighted_quantiles[0]
        weighted_quantiles /= weighted_quantiles[-1]
    else:
        weighted_quantiles /= np.sum(sample_weight)
    return np.interp(quantiles, weighted_quantiles, values)

In [1]:
import pandas as pd
from datetime import datetime

In [168]:
target_cols = ['Ticker', 'Holding', 'Market_Val_Percent', 'Market_Val_USD', 
        'Face_Val_USD', 'Coupon_Percent', 'Maturity_Date']

expected_cols = ['Ticker','Holding name','% of market value',
                 'Market value','Face amount','Coupon/Yield','Maturity date']

cols = dict(zip(expected_cols,target_cols))

dateparse = lambda x: datetime.strptime(x, '%d %b %Y')

df = pd.read_excel(
    'Holdings details - Vanguard USD Corporate Bond UCITS ETF (USD) Accumulating - 11_11_2024.xlsx',
    skiprows=6)[:-2].rename(columns=cols)

df['Market_Val_Percent'] = df['Market_Val_Percent'].apply(lambda x: float(x.replace('%', '')))
df['Market_Val_USD'] = df['Market_Val_USD'].apply(lambda x: float(str(x).replace('US$', '').replace(',', '')))
df['Face_Val_USD']=df['Face_Val_USD'].apply(lambda x: float(str(x).replace(',', '')))
df['Coupon_Percent']=df['Coupon_Percent'].apply(lambda x: float(str(x).replace('%', '')))
df['Maturity_Date']=df['Maturity_Date'].apply(lambda x: pd.to_datetime(x,format='%d %b %Y'))
df['Maturity_Years_FromNow']=df['Maturity_Date'].apply(lambda x:(x-datetime.today()).days/365.25)
df['AnnExpectCoupon_NomUSD'] = df['Face_Val_USD']*df['Coupon_Percent']/100

year_bins = (
    np.pad(weighted_quantile(df['Maturity_Years_FromNow'],quantiles=[0.25,0.5,0.75],sample_weight=df['Market_Val_Percent']), 
           (1, 1), mode='constant', 
           constant_values=(0, int(np.ceil(df3['Maturity_Years_FromNow'].max())))
           )
)

df['Year_Category']=pd.cut(df['Maturity_Years_FromNow'],bins=year_bins)

In [169]:
df

Unnamed: 0,Ticker,Holding,Market_Val_Percent,Market_Val_USD,Face_Val_USD,Coupon_Percent,Maturity_Date,Maturity_Years_FromNow,AnnExpectCoupon_NomUSD,Year_Category
0,T,United States Treasury Note/Bond,0.4426,14584864.38,14626000.0,3.50,2026-09-30,1.878166,511910.0,"(0.0, 3.368]"
1,ABIBB,Anheuser-Busch Cos LLC / Anheuser-Busch InBev ...,0.1007,3317856.23,3370000.0,4.90,2046-02-01,21.218344,165130.0,"(15.949, 149.0]"
2,CVS,CVS Health Corp,0.0798,2628037.32,2880000.0,5.05,2048-03-25,23.362081,145440.0,"(15.949, 149.0]"
3,TMUS,T-Mobile USA Inc,0.0745,2455508.44,2525000.0,3.88,2030-04-15,5.418207,97970.0,"(3.368, 6.5]"
4,TD,Toronto-Dominion Bank/The,0.0727,2394672.22,2395000.0,4.11,2027-06-08,2.565366,98434.5,"(0.0, 3.368]"
...,...,...,...,...,...,...,...,...,...,...
9774,MAALRA,MAR Sukuk Ltd,0.0000,0.00,0.0,2.21,2025-09-02,0.802190,0.0,"(0.0, 3.368]"
9775,ABQKQD,ABQ Finance Ltd,0.0000,0.00,0.0,1.88,2025-09-08,0.818617,0.0,"(0.0, 3.368]"
9776,TINGYI,Tingyi Cayman Islands Holding Corp,0.0000,0.00,0.0,1.63,2025-09-24,0.862423,0.0,"(0.0, 3.368]"
9777,CONAMP,Contemporary Ruiding Development Ltd,0.0000,0.00,0.0,1.88,2025-09-17,0.843258,0.0,"(0.0, 3.368]"


In [8]:
df.Ann_Coupon_USD.sum()/df.Market_Val_USD.sum()

0.04401703026023111

In [167]:
import plotly.express as px

fig = px.histogram(df,x='Coupon_Percent',y='Market_Val_Percent',color='Year_Category',nbins=100)
fig.show()

In [None]:
# Adjusted coupon based on current market value (not face value), reflecting real yield??

0       (2025.0, 2055.0]
1       (2025.0, 2055.0]
2       (2025.0, 2055.0]
3       (2025.0, 2055.0]
4       (2025.0, 2055.0]
              ...       
9774    (2025.0, 2055.0]
9775    (2025.0, 2055.0]
9776    (2025.0, 2055.0]
9777    (2025.0, 2055.0]
9778    (2025.0, 2055.0]
Name: Maturity_Year, Length: 9779, dtype: category
Categories (5, interval[float64, right]): [(2025.0, 2055.0] < (2055.0, 2084.0] < (2084.0, 2114.0] < (2114.0, 2143.0] < (2143.0, 2173.0]]

In [55]:
import plotly.express as px

fig = px.ecdf(df,x='Maturity_Years_FromNow',y='Market_Val_Percent',marginal='histogram')
fig.show()

In [136]:
df3 = (
    df[['Maturity_Years_FromNow','Market_Val_Percent']].assign(
        Market_Val_Percent=lambda x: x['Market_Val_Percent']/sum(df['Market_Val_Percent'])).sort_values('Maturity_Years_FromNow')
)                                                  

In [114]:
import plotly.express as px
fig = px.line(df3,x='Maturity_Years_FromNow', y='Market_Val_CumPerc')
fig.show()

In [80]:
sum((df['Market_Val_Percent']/sum(df['Market_Val_Percent']))*df['Maturity_Years_FromNow'])

10.627377125937056

In [102]:
import numpy as np
df['Maturity_Years_FromNowBand']=pd.cut(df['Maturity_Years_FromNow'],range(0,int(np.ceil(df['Maturity_Years_FromNow'].max())),1))

In [103]:
df[['Maturity_Years_FromNowBand','Face_Val_USD','Coupon_Percent','Maturity_Years_FromNow','Ann_Coupon_USD']]

Unnamed: 0,Maturity_Years_FromNowBand,Face_Val_USD,Coupon_Percent,Maturity_Years_FromNow,Ann_Coupon_USD
0,"(1, 2]",14626000.0,3.50,1.878166,511910.0
1,"(21, 22]",3370000.0,4.90,21.218344,165130.0
2,"(23, 24]",2880000.0,5.05,23.362081,145440.0
3,"(5, 6]",2525000.0,3.88,5.418207,97970.0
4,"(2, 3]",2395000.0,4.11,2.565366,98434.5
...,...,...,...,...,...
9774,"(0, 1]",0.0,2.21,0.802190,0.0
9775,"(0, 1]",0.0,1.88,0.818617,0.0
9776,"(0, 1]",0.0,1.63,0.862423,0.0
9777,"(0, 1]",0.0,1.88,0.843258,0.0


In [99]:
df

Unnamed: 0,Ticker,Holding,Market_Val_Percent,Market_Val_USD,Face_Val_USD,Coupon_Percent,Maturity_Date,Maturity_Years_FromNow,Ann_Coupon_USD,Maturity_Years_FromNowBand
0,T,United States Treasury Note/Bond,0.4426,14584864.38,14626000.0,3.50,2026-09-30,1.878166,511910.0,"(1, 2]"
1,ABIBB,Anheuser-Busch Cos LLC / Anheuser-Busch InBev ...,0.1007,3317856.23,3370000.0,4.90,2046-02-01,21.218344,165130.0,"(21, 22]"
2,CVS,CVS Health Corp,0.0798,2628037.32,2880000.0,5.05,2048-03-25,23.362081,145440.0,"(23, 24]"
3,TMUS,T-Mobile USA Inc,0.0745,2455508.44,2525000.0,3.88,2030-04-15,5.418207,97970.0,"(5, 6]"
4,TD,Toronto-Dominion Bank/The,0.0727,2394672.22,2395000.0,4.11,2027-06-08,2.565366,98434.5,"(2, 3]"
...,...,...,...,...,...,...,...,...,...,...
9774,MAALRA,MAR Sukuk Ltd,0.0000,0.00,0.0,2.21,2025-09-02,0.802190,0.0,"(0, 1]"
9775,ABQKQD,ABQ Finance Ltd,0.0000,0.00,0.0,1.88,2025-09-08,0.818617,0.0,"(0, 1]"
9776,TINGYI,Tingyi Cayman Islands Holding Corp,0.0000,0.00,0.0,1.63,2025-09-24,0.862423,0.0,"(0, 1]"
9777,CONAMP,Contemporary Ruiding Development Ltd,0.0000,0.00,0.0,1.88,2025-09-17,0.843258,0.0,"(0, 1]"
