In [135]:
import pandas as pd
import requests
import json
import numpy as np
from scipy import signal
from datetime import datetime
from tqdm import tqdm

In [136]:
period_keys = ['January', 'Jan', 'February', 'Feb', 'March', 'Mar', 'April', 'Apr', 'May', 'May', 'June', 'Jun', 'July', 'Jul', 'August', 'Aug' 'September', 'Sep', 'October', 'Oct' 'November', 'Nov' 'December', 'Dec']
period_val = zip(period_keys, [i for i in range(1, 13)])
list(period_val)

[('January', 1),
 ('Jan', 2),
 ('February', 3),
 ('Feb', 4),
 ('March', 5),
 ('Mar', 6),
 ('April', 7),
 ('Apr', 8),
 ('May', 9),
 ('May', 10),
 ('June', 11),
 ('Jun', 12)]

In [137]:
import calendar

month_names = list(calendar.month_name)
month_names.remove('')

abbr_month_names = [name[:3] for name in month_names]
month_names.extend(abbr_month_names)

month_vals = [i for i in range(1, 13)] * 2
month_vals

z = zip(month_names, month_vals)
period_map = dict(z)
print(period_map)

{'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12, 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}


In [138]:
from typing import Dict
from typing import List
from typing import Any

def bls_gov_to_dataframe(raw_json: List[str], value_col_name: str) -> Any:
    df = pd.DataFrame(columns = ["DATE",value_col_name])
    for json_data in raw_json:
        for series in json_data['Results']['series']:
            seriesId = series['seriesID']
            for item in series['data']:
                year = item['year']
                value = item['value']

                periodName = item['periodName']
                period = period_map[periodName]

                df.loc[len(df.index)] = [str(year) + '-' + str(period) + '-01', float(value)]
                
    df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d')
    df.sort_values(by=['DATE'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

def bls_gov_request(url: str, headers : Dict[str, str], 
                    series : List[str], 
                    startYear: int, 
                    endYear: int) -> Any:
    resp_json = []
    # Split requests in 10 year chunks (limit enforced by endpoint)
    years = [(s - 1, s + 9) if s + 9 < endYear else (s - 1, endYear + 1) for s in range(startYear, endYear, 10)]
    for y in tqdm(years):
        start_year = y[0]
        end_year = y[1]

        data = json.dumps({"seriesid": series, "startyear":str(start_year), "endyear":str(end_year)})

        print(f'Start Year: {start_year}, End Year: {end_year}, Url: {url}')
        resp = requests.post(url, data=data, headers=headers)
        print(f'Status: {resp.status_code}')

        txt = resp.json()
        resp_json.append(txt)
        
    return resp_json



In [139]:
# Data from US Bureau of Labor and Statistics
headers = {'Content-type': 'application/json'}
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

In [140]:
# Labor Force Statistics from the Current Population Survey

# Series Id: LNS14000000
# Seasonally Adjusted
# Series title: (Seas) Unemployment Rate
# Labor force status: Unemployment rate
# Type of data: Percent or rate
# Age: 16 years and over
    
unrate_resp = bls_gov_request(url=url, headers=headers, series=['LNS14000000'], startYear=2000, endYear=2024)
unrate = bls_gov_to_dataframe(unrate_resp, 'UNEMPLOYMENT')
print(f'Loaded Unemployment data: {len(unrate)} rows')
unrate.head()

  0%|                                                     | 0/3 [00:00<?, ?it/s]

Start Year: 1999, End Year: 2009, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


 33%|███████████████                              | 1/3 [00:00<00:00,  3.32it/s]

Status: 200
Start Year: 2009, End Year: 2019, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


 67%|██████████████████████████████               | 2/3 [00:00<00:00,  3.21it/s]

Status: 200
Start Year: 2019, End Year: 2025, Url: https://api.bls.gov/publicAPI/v2/timeseries/data/


100%|█████████████████████████████████████████████| 3/3 [00:01<00:00,  2.98it/s]

Status: 200





Loaded Unemployment data: 296 rows


Unnamed: 0,DATE,UNEMPLOYMENT
0,1999-01-01,4.3
1,1999-02-01,4.4
2,1999-03-01,4.2
3,1999-04-01,4.3
4,1999-05-01,4.2


In [141]:
unrate.to_csv('unemployment_2000_2023.csv', index=False)

In [142]:
headers = {'Content-type': 'application/json'}
start_date = '2000-01-01'
end_date = '2023-08-15'
search_type='rate'
url = 'https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json'
data = {"startDate":start_date, "endDate":end_date, 'type':search_type}

months_seen = dict()

print(f'Start Date: {start_date}, End Date: {end_date}, Url: {url}')
res = requests.get(url, params=data, headers=headers)
print(f'Status: {res.status_code}')

effr_resp_json = res.json()
effr = pd.DataFrame(columns = ["DATE","EFFR"])
for r in tqdm(effr_resp_json['refRates']):
    effectiveDate = r['effectiveDate']
    sd = datetime.strptime(effectiveDate, '%Y-%m-%d')
    YYYY_MM = str(sd.year) + '-' + str(sd.month)
    if YYYY_MM not in months_seen.keys():
        months_seen[YYYY_MM] = 1
        percentRate = r['percentRate']
        effectiveDate = YYYY_MM + '-01'
        effr.loc[len(effr.index)] = [effectiveDate, float(percentRate)]

effr['DATE'] = pd.to_datetime(effr['DATE'], format='%Y-%m-%d')
effr.sort_values(by=['DATE'], inplace=True)
effr.reset_index(drop=True, inplace=True)
print(len(effr))
effr.head(n=10)

Start Date: 2000-01-01, End Date: 2023-08-15, Url: https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json
Status: 200


100%|████████████████████████████████████| 5812/5812 [00:00<00:00, 24365.09it/s]

278





Unnamed: 0,DATE,EFFR
0,2000-07-01,6.64
1,2000-08-01,6.65
2,2000-09-01,6.6
3,2000-10-01,6.59
4,2000-11-01,6.62
5,2000-12-01,5.41
6,2001-01-01,5.74
7,2001-02-01,5.59
8,2001-03-01,5.29
9,2001-04-01,4.67


In [143]:
effr.to_csv('effr_2000_2023.csv', index=False)

In [145]:
# Series Id: CUUR0000SA0
# Not Seasonally Adjusted
# Series Title: All items in U.S. city average, all urban consumers, not seasonally adjusted
# Area: U.S. city average
# Item: All items
# Base Period: 1982-84=100

cpi_percent_resp = bls_gov_request(url=url, headers=headers, series=['CUUR0000SA0'], startYear=1990, endYear=2024)

cpi_percent = bls_gov_to_dataframe(cpi_percent_resp, 'CPI')
print(f'Loaded CPI data: {len(cpi_percent)} rows')
cpi_percent.head()

  0%|                                                     | 0/4 [00:00<?, ?it/s]

Start Year: 1989, End Year: 1999, Url: https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json


 25%|███████████▎                                 | 1/4 [00:00<00:01,  2.21it/s]

Status: 403
Start Year: 1999, End Year: 2009, Url: https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json


 75%|█████████████████████████████████▊           | 3/4 [00:00<00:00,  4.09it/s]

Status: 403
Start Year: 2009, End Year: 2019, Url: https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json
Status: 403
Start Year: 2019, End Year: 2025, Url: https://markets.newyorkfed.org/api/rates/unsecured/effr/search.json


100%|█████████████████████████████████████████████| 4/4 [00:01<00:00,  3.88it/s]


Status: 403


KeyError: 'Results'

In [146]:
# Compute 12-month % change
cpis = cpi_percent['CPI']
p = [round(abs(((cpis[i] - cpis[i - 12]) / cpis[i - 12]) * 100.0), 1) for i in range(12, len(cpis))]

for i in range(12, len(cpis)):
    abs_diff = abs(cpis[i] - cpis[i - 12])
    avg_change = (cpis[i] + cpis[i - 12]) / 2.0
    a = round((abs_diff / avg_change) * 100, 1)
    print(f'{cpis[i]}, {cpis[i - 12]}, {a}')
    
cpi_percent2 = cpi_percent.drop([i for i in range(0, 12)])
cpi_percent2['CPI_PERCENT'] = p
cpi_percent2.reset_index(drop=True, inplace=True)
print(len(cpi_percent2))
cpi_percent2.tail()

127.4, 121.1, 5.1
128.0, 121.6, 5.1
128.7, 122.3, 5.1
128.9, 123.1, 4.6
129.2, 123.8, 4.3
129.9, 124.1, 4.6
130.4, 124.4, 4.7
131.6, 124.6, 5.5
132.7, 125.0, 6.0
133.5, 125.6, 6.1
133.8, 125.9, 6.1
133.8, 126.1, 5.9
134.6, 127.4, 5.5
134.8, 128.0, 5.2
135.0, 128.7, 4.8
135.2, 128.9, 4.8
135.6, 129.2, 4.8
136.0, 129.9, 4.6
136.2, 130.4, 4.4
136.6, 131.6, 3.7
137.2, 132.7, 3.3
137.4, 133.5, 2.9
137.8, 133.8, 2.9
137.9, 133.8, 3.0
138.1, 134.6, 2.6
138.6, 134.8, 2.8
139.3, 135.0, 3.1
139.5, 135.2, 3.1
139.7, 135.6, 3.0
140.2, 136.0, 3.0
140.5, 136.2, 3.1
140.9, 136.6, 3.1
141.3, 137.2, 2.9
141.8, 137.4, 3.2
142.0, 137.8, 3.0
141.9, 137.9, 2.9
142.6, 138.1, 3.2
143.1, 138.6, 3.2
143.6, 139.3, 3.0
144.0, 139.5, 3.2
144.2, 139.7, 3.2
144.4, 140.2, 3.0
144.4, 140.5, 2.7
144.8, 140.9, 2.7
145.1, 141.3, 2.7
145.7, 141.8, 2.7
145.8, 142.0, 2.6
145.8, 141.9, 2.7
146.2, 142.6, 2.5
146.7, 143.1, 2.5
147.2, 143.6, 2.5
147.4, 144.0, 2.3
147.5, 144.2, 2.3
148.0, 144.4, 2.5
148.4, 144.4, 2.7
149.0, 144

Unnamed: 0,DATE,CPI,CPI_PERCENT
399,2023-04-01,303.363,4.9
400,2023-05-01,304.127,4.0
401,2023-06-01,305.109,3.0
402,2023-07-01,305.691,3.2
403,2023-08-01,307.026,3.7


In [147]:
cpi_percent2.to_csv('cpi_2000_2023.csv', index=False)