In [266]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

pd.set_option('display.max_colwidth', 200)

# URL of the source
url = 'https://www.hl.co.uk/shares/corporate-bonds-gilts/bond-prices/gbp-bonds'

# Load data from URL
tables = pd.read_html(url)
# Extract the table of interest (in this case, the first table on the page)
df = tables[0]
df=df.drop(columns=['Actions'])
df.head(10)



Unnamed: 0,Issuer,Coupon (%),Maturity,Price
0,3i Group GBP | XS0104440986 | 0924597,5.75,3 December 2032,102.735
1,A2D Funding plc GBP | XS1103286305 | BQ8NZW9,4.5,30 September 2026,99.85
2,Abrdn Asia Focus Plc GBX | GB00BFZ0WT29 | BFZ0WT2,2.25,31 May 2025,98.7
3,Allied Irish Banks plc GBP | XS0435957682 | BFMTY18,0.0,25 June 2035,61.0
4,Alpha Plus Holdings plc GBP | XS1379593566 | BZ5ZT30,5.0,31 March 2024,95.0
5,Anglian Water GBP | XS0089553282 | 0377986,6.875,21 August 2023,100.775
6,Anglian Water Services Financing plc GBP | XS0093312550 | 0482976,6.625,15 January 2029,109.1
7,Aviva plc GBP | XS0138717441 | 3106518,6.125,14 November 2036,100.7
8,Aviva plc GBP | XS1488459485 | BDF5PP0,4.375,12 September 2049,85.35
9,B.A.T. International Finance GBP | XS0468426266 | B5KP6X4,6.0,24 November 2034,91.175


In [267]:
df_issuer = df
print(df_issuer['Issuer'])
# Split "issuer" column by "|" character
df_issuer[['Name', 'ISIN', 'SEDOL']] = df_issuer['Issuer'].str.split('|', expand=True)

# Remove leading/trailing whitespace from the newly created columns
df_issuer['Name'] = df_issuer['Name'].str.strip()
df_issuer['ISIN'] = df_issuer['ISIN'].str.strip()
df_issuer['SEDOL'] = df_issuer['SEDOL'].str.strip()

# Drop the original "issuer" column
df_issuer.drop('Issuer', axis=1, inplace=True)

# Get a list of all column names except "Name", "ISIN", and "SEDOL"
other_columns = [col for col in df_issuer.columns if col not in ['Name', 'ISIN', 'SEDOL']]

# Reorder columns with "Name", "ISIN", and "SEDOL" as the first columns, followed by the remaining columns
df_issuer = df_issuer[['Name']+other_columns+['ISIN','SEDOL']]

# Display the updated DataFrame with additional columns
print(df_issuer)


0                            3i Group GBP | XS0104440986 | 0924597
1                     A2D Funding plc GBP | XS1103286305 | BQ8NZW9
2                Abrdn Asia Focus Plc GBX | GB00BFZ0WT29 | BFZ0WT2
3              Allied Irish Banks plc GBP | XS0435957682 | BFMTY18
4             Alpha Plus Holdings plc GBP | XS1379593566 | BZ5ZT30
                                  ...                             
133         Vanquis Banking Group plc GBP | XS1209091856 | BWNH4K9
134                    Vodafone Group GBP | XS0181816652 | 3388361
135                    Vodafone Group GBP | XS0158715713 | 3230097
136    West Bromwich Building Society GBP | GB00B0CX2M20 | B0CX2M2
137        Yorkshire Building Society GBP | XS0498549194 | B5674L8
Name: Issuer, Length: 138, dtype: object
                                   Name  Coupon (%)           Maturity  \
0                          3i Group GBP       5.750    3 December 2032   
1                   A2D Funding plc GBP       4.500  30 September 2026   


In [272]:
td_list = soup.find_all('td')
df_href = pd.DataFrame(['Name','ISIN','SEDOL','href'])
# Send a request to the website and retrieve the HTML content
response = requests.get(url)
html_content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Find all <td> elements with class "link-headline"
td_list = soup.find_all('td',{'class':'align-left'})
#print(td_list)
# Create empty lists to store data for each column
name_list = []
isin_list = []
sedol_list = []
href_list = []
ccy_list = []

# Extract the title, href, and other data from the <td> elements
for td in td_list:
    #print(td)
    if td.find('a') and 'href' in td.find('a').attrs:
        
        # Find all span elements within the td element
        span_elements = td.find_all('span')
        ccy=""
        isin=""
        sedol=""
        
        # Remove the span elements from the td element
        for span_element in span_elements:
            ccy,isin,sedol= span_element.get_text().split('|')
            ccy=ccy.strip()
            isin=isin.strip()
            sedol=sedol.strip()
            span_element.extract()

        name = td.get_text(strip=True)
        href = td.find('a')['href']
        # Split the title by "|" character
        # Extract the Name, ISIN, and SEDOL from title_parts
        #print(f'\n ISIN:{isin} SEDOL:{sedol} HREF:{href}')
        # Append data to the corresponding lists
        href_list.append(href)
        name_list.append(f'{name} {ccy}')
        isin_list.append(isin)
        sedol_list.append(sedol)
        ccy_list.append(ccy)
# Create a dictionary to store data in a format that can be converted to a DataFrame
data_dict = {
    'Name': name_list,
    'ISIN': isin_list,
    'SEDOL': sedol_list,
    'href': href_list
}

# Convert the dictionary to a DataFrame
df_href = pd.DataFrame(data_dict)

# Display the resulting DataFrame with Name, ISIN, SEDOL, and Href columns
print(df_href)


                                   Name          ISIN    SEDOL  \
0                          3i Group GBP  XS0104440986  0924597   
1                   A2D Funding plc GBP  XS1103286305  BQ8NZW9   
2              Abrdn Asia Focus Plc GBX  GB00BFZ0WT29  BFZ0WT2   
3            Allied Irish Banks plc GBP  XS0435957682  BFMTY18   
4           Alpha Plus Holdings plc GBP  XS1379593566  BZ5ZT30   
..                                  ...           ...      ...   
133       Vanquis Banking Group plc GBP  XS1209091856  BWNH4K9   
134                  Vodafone Group GBP  XS0181816652  3388361   
135                  Vodafone Group GBP  XS0158715713  3230097   
136  West Bromwich Building Society GBP  GB00B0CX2M20  B0CX2M2   
137      Yorkshire Building Society GBP  XS0498549194  B5674L8   

                                                          href  
0    https://www.hl.co.uk/shares/shares-search-results/0924597  
1    https://www.hl.co.uk/shares/shares-search-results/BQ8NZW9  
2    https:/

In [273]:
df = pd.merge(df_issuer,df_href, on=['Name','ISIN','SEDOL'], how='outer')

In [274]:
# Define a function to generate link HTML
def generate_link(url):
    return f'<a href="{url}" target="_blank">{url}</a>'

# Apply link format to 'href' column
df = df.style.format({'href': generate_link})

df

Unnamed: 0,Name,Coupon (%),Maturity,Price,ISIN,SEDOL,href
0,3i Group GBP,5.75,3 December 2032,102.735,XS0104440986,0924597,https://www.hl.co.uk/shares/shares-search-results/0924597
1,A2D Funding plc GBP,4.5,30 September 2026,99.85,XS1103286305,BQ8NZW9,https://www.hl.co.uk/shares/shares-search-results/BQ8NZW9
2,Abrdn Asia Focus Plc GBX,2.25,31 May 2025,98.7,GB00BFZ0WT29,BFZ0WT2,https://www.hl.co.uk/shares/shares-search-results/BFZ0WT2
3,Allied Irish Banks plc GBP,0.0,25 June 2035,61.0,XS0435957682,BFMTY18,https://www.hl.co.uk/shares/shares-search-results/BFMTY18
4,Alpha Plus Holdings plc GBP,5.0,31 March 2024,95.0,XS1379593566,BZ5ZT30,https://www.hl.co.uk/shares/shares-search-results/BZ5ZT30
5,Anglian Water GBP,6.875,21 August 2023,100.775,XS0089553282,0377986,https://www.hl.co.uk/shares/shares-search-results/0377986
6,Anglian Water Services Financing plc GBP,6.625,15 January 2029,109.1,XS0093312550,0482976,https://www.hl.co.uk/shares/shares-search-results/0482976
7,Aviva plc GBP,6.125,14 November 2036,100.7,XS0138717441,3106518,https://www.hl.co.uk/shares/shares-search-results/3106518
8,Aviva plc GBP,4.375,12 September 2049,85.35,XS1488459485,BDF5PP0,https://www.hl.co.uk/shares/shares-search-results/BDF5PP0
9,B.A.T. International Finance GBP,6.0,24 November 2034,91.175,XS0468426266,B5KP6X4,https://www.hl.co.uk/shares/shares-search-results/B5KP6X4


In [275]:
# next use  the href column to  extract the number of 

In [None]:
#the table above shows that its actually the SEDOL numer that is used to retreive the bond page. 
# and that we don't need the use a webscrapper and can use pandas read_html directly as we have 
# the SEDOL in the original table. 

In [None]:
#continue here ... 