In [62]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

def clean_text(text):
    clean_text = text.replace('\n', '').strip()
    if clean_text == "" or clean_text == "N/A":
        return None
    return clean_text

def get_treasury_data_df(year=2022):
    TREASURY_DATA_URL = "https://home.treasury.gov/resource-center/data-chart-center/interest-rates/TextView?type=daily_treasury_yield_curve&field_tdr_date_value={}".format(year)
    response = requests.get(TREASURY_DATA_URL)
    soup = BeautifulSoup(response.text, 'html.parser')

    table = soup.find('table')
    headers = [clean_text(col_name.text) for col_name in table.find('thead').find('tr').find_all('th')]
    data = []
    for row in table.find('tbody').find_all('tr'):
        data.append([clean_text(value.text) for value in row.find_all('td')])
    return pd.DataFrame(data, columns=headers)

def text_to_date(text):
    return pd.to_datetime(text, format='%m/%d/%Y')

def text_to_float(text):
    return float(text)

In [53]:
original_df = get_treasury_data_df(2022)

In [66]:
select_columns = ['Date', '3 Mo', '6 Mo', '1 Yr', '2 Yr', '3 Yr', '5 Yr', '7 Yr', '10 Yr', '20 Yr', '30 Yr']
df = original_df[select_columns]

df.loc[:, 'Date'] = df['Date'].apply(text_to_date)
for col in select_columns[1:]:
    df.loc[:, col] = df[col].apply(text_to_float)


In [70]:
df.head()

Unnamed: 0,Date,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,2022-01-03 00:00:00,0.08,0.22,0.4,0.78,1.04,1.37,1.55,1.63,2.05,2.01
1,2022-01-04 00:00:00,0.08,0.22,0.38,0.77,1.02,1.37,1.57,1.66,2.1,2.07
2,2022-01-05 00:00:00,0.09,0.22,0.41,0.83,1.1,1.43,1.62,1.71,2.12,2.09
3,2022-01-06 00:00:00,0.1,0.23,0.45,0.88,1.15,1.47,1.66,1.73,2.12,2.09
4,2022-01-07 00:00:00,0.1,0.24,0.43,0.87,1.17,1.5,1.69,1.76,2.15,2.11
