In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

## Grab all URLs from the Finder

In [2]:
finder_url = "https://markets.businessinsider.com/bonds/finder?borrower=71&maturity=shortterm&yield=&bondtype=2%2c3%2c4%2c16&coupon=&currency=184&rating=&country=19"
r = requests.get(finder_url)
soup = BeautifulSoup(r.text, "html.parser")

# Example: find all links in the table
table = soup.find("table")  # adjust selector if needed
rows = table.find_all("tr")[1:]  # skip header
snapshot_urls = []

for row in rows:
    link = row.find("a", href=True)
    if link:
        snapshot_urls.append("https://markets.businessinsider.com" + link['href'])

In [5]:
print("Found " + str(len(snapshot_urls)) + " snapshot URLs")

Found 20 snapshot URLs


## Extract metadata from a bond page TEST

In [27]:
test_url = snapshot_urls[0]
r = requests.get(test_url)

# Check status code
print("HTTP status code:", r.status_code)

HTTP status code: 200


In [28]:
import requests
test_url = snapshot_urls[0]
r = requests.get(test_url)
html_text = r.text

# Save to file
with open("bond_page.html", "w", encoding="utf-8") as f:
    f.write(html_text)

## Extract metadata from all URLs

In [None]:
from bs4 import BeautifulSoup
import requests

test_url = snapshot_urls[0]
r = requests.get(test_url)
soup = BeautifulSoup(r.text, "html.parser")


ISIN: None


In [34]:
def extract_field(soup, field_name):
    for row in soup.find_all("tr", class_="table__tr"):
        tds = row.find_all("td")
        if len(tds) >= 2 and tds[0].text.strip() == field_name:
            value = tds[1].text.strip()
            if value:
                return value
    return None

name = extract_field(soup, "Name")      #
coupon = extract_field(soup, "Coupon")      # "4.500%"
issue_date = extract_field(soup, "Issue Date")  # "11/1/2023"
maturity_date = extract_field(soup, "Maturity Date")  # "2/1/2026"
isin = extract_field(soup, "ISIN")
print(name, coupon, issue_date, maturity_date, isin)

CANADA 23/26 4.500% 11/1/2023 2/1/2026 CA135087R226


In [35]:
metadata_list = []

for url in snapshot_urls:
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")

    # Extract metadata
    try:
        name = extract_field(soup, "Name")      #
        coupon = extract_field(soup, "Coupon")      # "4.500%"
        issue_date = extract_field(soup, "Issue Date")  # "11/1/2023"
        maturity_date = extract_field(soup, "Maturity Date")  # "2/1/2026"
        isin = extract_field(soup, "ISIN")
    except:
        continue  # skip if any metadata missing

    metadata_list.append({
        "bond_id": isin,
        "name": name,
        "coupon": float(coupon.replace('%','')),
        "isin": isin,
        "issue_date": issue_date,
        "maturity_date": maturity_date
    })

df_metadata = pd.DataFrame(metadata_list)
print(df_metadata)

         bond_id                       name  coupon          isin  issue_date  \
0   CA135087R226               CANADA 23/26   4.500  CA135087R226   11/1/2023   
1   CA135087L518               CANADA 20/26   0.250  CA135087L518   10/9/2020   
2   CA135087P816               CANADA 23/26   3.000  CA135087P816   1/20/2023   
3   CA135087R556               CANADA 24/26   4.000  CA135087R556   2/12/2024   
4   CA135087E679                   CDA 2026   1.500  CA135087E679   7/21/2015   
5   CA135087R978               CANADA 24/26   4.000  CA135087R978    5/6/2024   
6   CA135087L930               CANADA 21/26   1.000  CA135087L930   4/16/2021   
7   CA135087S398               CANADA 24/26   3.250  CA135087S398    8/2/2024   
8   CA135087S547               CANADA 24/27   3.000  CA135087S547   11/1/2024   
9   CA135087M847               CANADA 21/27   1.250  CA135087M847  10/15/2021   
10  CA135087S885               CANADA 25/27   2.750  CA135087S885   2/20/2025   
11  CA135087F825            

In [36]:
df_metadata.to_csv("bonds_metadata.csv", index=False)

In [11]:
print(metadata_list)

[]


In [None]:
df_metadata = pd.DataFrame(metadata_list)
df_metadata.to_csv("bonds_metadata.csv", index=False)

## Extract historical prices, try deutsche instead

In [37]:
import requests
deutsche_url = "https://live.deutsche-boerse.com/bond/ca135087r226-canada-government-of-4-5-23-26/price-history/historical-prices-and-volumes"
r = requests.get(deutsche_url)
html_text = r.text

# Save to file
with open("historical_prices.html", "w", encoding="utf-8") as f:
    f.write(html_text)