In [1]:
import pandas as pd
import requests
import json
import numpy as np
from scipy import signal
from datetime import datetime
from tqdm import tqdm

In [2]:
period_map = {'January' : 1, 'February': 2, 'March' : 3, 'April' : 4, 'May' : 5, 'June' : 6, 'July' : 7, 'August' : 8, 'September' : 9, 'October' : 10, 'November' : 11, 'December' : 12}

In [23]:
from typing import Dict
from typing import List
from typing import Any

def bls_gov_to_dataframe(raw_json: List[str], value_col_name: str) -> Any:
    df = pd.DataFrame(columns = ["DATE",value_col_name])
    for json_data in raw_json:
        for series in json_data['Results']['series']:
            seriesId = series['seriesID']
            for item in series['data']:
                year = item['year']
                value = item['value']

                periodName = item['periodName']
                period = period_map[periodName]

                df.loc[len(df.index)] = [str(year) + '-' + str(period) + '-01', float(value)]
                
    df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d')
    df.sort_values(by=['DATE'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

def bls_gov_request(url: str, headers : Dict[str, str], 
                    series : List[str], 
                    startYear: int, 
                    endYear: int) -> Any:
    resp_json = []
    # Split requests in 10 year chunks (limit enforced by endpoint)
    years = [(s, s + 10) if s + 10 < endYear else (s, endYear) for s in range(startYear, endYear, 11)]
    for y in tqdm(years):
        start_year = y[0]
        end_year = y[1]

        data = json.dumps({"seriesid": series, "startyear":str(start_year), "endyear":str(end_year)})

        print(f'Start Year: {start_year}, End Year: {end_year}, Url: {url}')
        resp = requests.post(url, data=data, headers=headers)
        print(f'Status: {resp.status_code}')

        txt = resp.json()
        resp_json.append(txt)
        
    return resp_json



In [20]:
# Data from US Bureau of Labor and Statistics
headers = {'Content-type': 'application/json'}
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

In [25]:
# Chained CPI for All Urban Consumers, U.S. city average (C-CPI-U)
# Series Id: SUUR0000SA0L1E
# Not Seasonally Adjusted
# Series Title: All items less food and energy in U.S. city average, all urban consumers, chained, not seasonally adjusted
# Area: U.S. city average
# Item: All items less food and energy
# Base Period: DECEMBER 1999=100
cpi_resp = bls_gov_request(url=url, headers=headers, series=['SUUR0000SA0L1E'], startYear=2000, endYear=2024)
cpi = bls_gov_to_dataframe(cpi_resp, 'CPI')
print(f'Loaded CPI data: {len(cpi)} rows')
cpi.head()

  0%|                                                     | 0/3 [00:00<?, ?it/s]

Start Year: 2000, End Year: 2010, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


 33%|███████████████                              | 1/3 [00:00<00:00,  2.79it/s]

Status: 200
Start Year: 2011, End Year: 2021, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


 67%|██████████████████████████████               | 2/3 [00:00<00:00,  2.57it/s]

Status: 200
Start Year: 2022, End Year: 2024, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


100%|█████████████████████████████████████████████| 3/3 [00:01<00:00,  2.87it/s]

Status: 200
Loaded CPI data: 260 rows





Unnamed: 0,DATE,CPI
0,2000-01-01,100.3
1,2000-02-01,100.7
2,2000-03-01,101.2
3,2000-04-01,101.3
4,2000-05-01,101.2


In [22]:
cpi.to_csv('cpi_2000_2023.csv', index=False)

In [28]:
# Labor Force Statistics from the Current Population Survey

# Series Id: LNS14000000
# Seasonally Adjusted
# Series title: (Seas) Unemployment Rate
# Labor force status: Unemployment rate
# Type of data: Percent or rate
# Age: 16 years and over
    
unrate_resp = bls_gov_request(url=url, headers=headers, series=['LNS14000000'], startYear=2000, endYear=2024)
unrate = bls_gov_to_dataframe(unrate_resp, 'UNEMPLOYMENT')
print(f'Loaded Unemployment data: {len(unrate)} rows')
unrate.head()

  0%|                                                     | 0/3 [00:00<?, ?it/s]

Start Year: 2000, End Year: 2010, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


 33%|███████████████                              | 1/3 [00:00<00:01,  1.84it/s]

Status: 200
Start Year: 2011, End Year: 2021, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


 67%|██████████████████████████████               | 2/3 [00:00<00:00,  2.07it/s]

Status: 200
Start Year: 2022, End Year: 2024, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


100%|█████████████████████████████████████████████| 3/3 [00:01<00:00,  2.15it/s]

Status: 200
Loaded Unemployment data: 260 rows





Unnamed: 0,DATE,UNEMPLOYMENT
0,2000-01-01,4.0
1,2000-02-01,4.1
2,2000-03-01,4.0
3,2000-04-01,3.8
4,2000-05-01,4.0


In [29]:
unrate.to_csv('unemployment_2000_2023.csv', index=False)

In [30]:
headers = {'Content-type': 'application/json'}
start_date = '2000-01-01'
end_date = '2023-08-15'
search_type='rate'
url = 'https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json'
data = {"startDate":start_date, "endDate":end_date, 'type':search_type}

months_seen = dict()

print(f'Start Date: {start_date}, End Date: {end_date}, Url: {url}')
res = requests.get(url, params=data, headers=headers)
print(f'Status: {res.status_code}')

effr_resp_json = res.json()
effr = pd.DataFrame(columns = ["DATE","EFFR"])
for r in tqdm(effr_resp_json['refRates']):
    effectiveDate = r['effectiveDate']
    sd = datetime.strptime(effectiveDate, '%Y-%m-%d')
    YYYY_MM = str(sd.year) + '-' + str(sd.month)
    if YYYY_MM not in months_seen.keys():
        months_seen[YYYY_MM] = 1
        percentRate = r['percentRate']
        effectiveDate = YYYY_MM + '-01'
        effr.loc[len(effr.index)] = [effectiveDate, float(percentRate)]

effr['DATE'] = pd.to_datetime(effr['DATE'], format='%Y-%m-%d')
effr.sort_values(by=['DATE'], inplace=True)
effr.reset_index(drop=True, inplace=True)
print(len(effr))
effr.head(n=10)

Start Date: 2000-01-01, End Date: 2023-08-15, Url: https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json
Status: 200


100%|████████████████████████████████████| 5812/5812 [00:00<00:00, 24545.18it/s]

278





Unnamed: 0,DATE,EFFR
0,2000-07-01,6.64
1,2000-08-01,6.65
2,2000-09-01,6.6
3,2000-10-01,6.59
4,2000-11-01,6.62
5,2000-12-01,5.41
6,2001-01-01,5.74
7,2001-02-01,5.59
8,2001-03-01,5.29
9,2001-04-01,4.67


In [31]:
effr.to_csv('effr_2000_2023.csv', index=False)