In [1]:
'''
Requirements:
US Bureau of Labor Statistic API Key in config.py

Sample Code Source:
https://www.bls.gov/developers/api_python.htm#python2

Data website:
https://data.bls.gov/timeseries/APU0000704111
'''
;

''

In [24]:
import requests
import json
import pandas as pd
import datetime as dt
from config import bls_api_key, username, passphrase
from sqlalchemy import create_engine

now_year = dt.date.today().year

def bacon_cpi_api(startyear =  now_year - 1, endyear = now_year):
    
    '''Default to since last year'''

    # Define variables
    series_id = 'APU0000704111'
    
    # Convert years to string
    startyear = str(startyear)
    endyear   = str(endyear)

    # Pulling API Data from US Bureau of Labor Statistics
    headers = {'Content-type': 'application/json'}
    data    = json.dumps({"seriesid": [series_id], "startyear"  : startyear, "endyear" : endyear, "registrationkey": bls_api_key})
    p       = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data = data, headers = headers)
    json_data = json.loads(p.text)
    
    # Print Status
    print(json_data["status"])
    
    # Print API return message if it's not empty
    if json_data['message']:
        print(json_data['message'])

    # Create a pandas DataFrame
    data_dict = {
        'year_month'  : [],
        'value' : [],
    }

    for item in json_data['Results']['series'][0]['data']:

        year_month  = item['year'] + item['period'].replace('M', '')
        value  = float(item['value'])
        
        data_dict['year_month'].append(year_month)
        data_dict['value'].append(value)

    data_pd = pd.DataFrame(data_dict)

    return data_pd


In [25]:
data_pd = bacon_cpi_api()
data_pd.to_csv('bacon_bls_data.csv', index = False)

REQUEST_SUCCEEDED


In [26]:
# data_pd = pd.read_csv('bacon_bls_data.csv')
data_pd

Unnamed: 0,year_month,value
0,202004,5.346
1,202003,5.257
2,202002,5.497
3,202001,5.505
4,201912,5.474
5,201911,5.505
6,201910,5.653
7,201909,5.571
8,201908,5.576
9,201907,5.701


In [19]:
# Connect to Postgres
rds_connection_string = f"{username}:{passphrase}@localhost:5432/bacon_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [20]:
# Load Bacon CPI Data to Postgres
data_pd.to_sql(name='bacon_cpi', con = engine, if_exists='append', index = False)

In [40]:
last_24m = pd.read_sql_query('SELECT * FROM bacon_cpi LIMIT 24', con=engine)
last_24m

Unnamed: 0,year_month,value
0,202003,5.257
1,202002,5.497
2,202001,5.505
3,201912,5.474
4,201911,5.505
5,201910,5.653
6,201909,5.571
7,201908,5.576
8,201907,5.701
9,201906,5.878


In [42]:
for year_month in data_pd.year_month:
    if year_month not in last_24m.year_month.values:
        
        new_data = data_pd.loc[data_pd.year_month == year_month].copy()
        
        # Load Bacon CPI Data to Postgres
        new_data.to_sql(name='bacon_cpi', con = engine, if_exists='append', index = False)
        
        print(year_month)

202004
