In [54]:
# Imports, constants, and reads
import pandas as pd
import requests as rq
import json
import api_key
import time

URL_ENDPOINT = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

national_series_list = pd.read_csv('/Users/danielsagher/Dropbox/Documents/General Assembly/Class Projects/capstone/csv/national_seriesID_list.csv')
state_series_list = pd.read_csv('/Users/danielsagher/Dropbox/Documents/General Assembly/Class Projects/capstone/csv/state_seriesID_list.csv')

START_YEAR = "2002"
END_YEAR = "2021"

In [62]:
# Define main
def main(series_input, name_of_file):
    
    data_results = derated_call(series_input, START_YEAR, END_YEAR)
    message_list = message_retriever(data_results)
    df = dataframe_maker(data_results)
    df.to_csv(f'/Users/danielsagher/Dropbox/Documents/General Assembly/Class Projects/capstone/csv/{name_of_file}.csv', index=False)
    message_list.to_csv(f'/Users/danielsagher/Dropbox/Documents/General Assembly/Class Projects/capstone/csv/{name_of_file}_message_list.csv', index=False)
    
    return df




In [56]:
# Define API call

def get_series_id(series, start_year, end_year):
    
    headers = {'Content-Type': 'application/json'}
    payload = json.dumps({"seriesid": series, "startyear": start_year, "endyear": end_year, "registrationKey": api_key.API_KEY})

    ro = rq.post(URL_ENDPOINT, data=payload, headers=headers)
    ro.raise_for_status()
    result = ro.json()
    return result, 'Done'


In [57]:
# Define message retriever

def message_retriever(data_results):
    
    message_list = []
    for call in data_results:

        message_list.extend(call['message'])

    df = pd.DataFrame(message_list, columns = ['message'])
    df['serialID'] = df['message'].apply(lambda x: x[29:-10])
    df['year'] = df['message'].apply(lambda x: x[-4:])
    return df

In [58]:
# Define rate limit workaround

def derated_call(lst, start_year = '2002', end_year = '2021'):
    lst = list(lst['seriesID'])
    final = [] # Initialize final bucket
    batch_size = 50 # Determine batch size

    while lst: # While lst exists 
        data = lst[:batch_size] # Get the first 50 items of the batch
        lst = lst[batch_size:] # Remove processed items from list

        print(f'Processessing batch of size: {len(data)}')
        print(data)
        result = get_series_id(data, start_year, end_year) # Call API

        if result[1] == 'Done': 
            print('API call successful')
            final.append(result[0]) # Add the results to the final list
            print('Sleeping for 5 seconds')
            time.sleep(5) # Sleep
    return final

In [59]:
# Define DataFrame maker

def dataframe_maker(data_results):
    final_df = pd.DataFrame([])
    
    print('Creating DataFrame...')
    for call in data_results:  # goes into each individual call
        
        for series in call['Results']['series']:
            seriesID = series['seriesID']
            
            for data_point in series['data']:
                data_dict = {
                    'seriesID': seriesID,
                    'year': data_point['year'],
                    'period': data_point['period'],
                    'period_name': data_point['periodName'],
                    'value': data_point['value'],
                    'footnotes': data_point['footnotes'] if not '[{}]' in data_point else None
                }
                
                df = pd.DataFrame([data_dict])
                final_df = pd.concat([final_df, df], ignore_index=True)
    print('DataFrame Created')
    
    return final_df

        

In [60]:
# Call main for national and state
final_national_df  = main(national_series_list[0:100], 'national_results')
time.sleep(15)
final_state_df = main(state_series_list[0:100], 'state_results')

Processessing batch of size: 50
['WSU100', 'WSU200', 'WSU010', 'WSU020', 'WSU001', 'WSU002', 'TUU10101AA01013585', 'TUU10101AA01013590', 'TUU10101AA01013588', 'TUU10101AA01014236', 'TUU10101AA01014241', 'TUU10101AA01014239', 'TUU10101AA01013951', 'TUU10101AA01013956', 'TUU10101AA01013954', 'TUU10101AA01042892', 'TUU10101AA01042907', 'TUU10101AA01042922', 'TUU10101AA01042901', 'TUU10101AA01042916', 'TUU10101AA01042931', 'TUU10101AA01009381', 'TUU10101AA01009497', 'TUU10101AA01009613', 'TUU10101AA01000865', 'TUU10101AA01000944', 'TUU10101AA01001023', 'TUU10101AA01001102', 'TUU10101AA01001181', 'TUU10101AA01001260', 'TUU30105AA01002864', 'TUU30105AA01002999', 'TUU30105AA01003084', 'TUU30105AA01002866', 'TUU30105AA01002865', 'TUU30105AA01002902', 'TUU30105AA01043855', 'TUU20101AA01002864', 'TUU20101AA01002999', 'TUU20101AA01003084', 'TUU20101AA01002866', 'TUU20101AA01002865', 'TUU20101AA01002902', 'TUU20101AA01002870', 'CXUTOTALEXPLB0201M', 'CXUFOODTOTLLB0201M', 'CXUFOODHOMELB0201M', 'CXUF

In [61]:
final_state_df

Unnamed: 0,seriesID,year,period,period_name,value,footnotes
0,WSU100,2021,M12,December,1,[{}]
1,WSU100,2021,M11,November,1,[{}]
2,WSU100,2021,M10,October,4,[{}]
3,WSU100,2021,M09,September,1,[{}]
4,WSU100,2021,M08,August,1,[{}]
...,...,...,...,...,...,...
9905,CUUR0000SEHF02,2002,M05,May,133.2,[{}]
9906,CUUR0000SEHF02,2002,M04,April,133.1,[{}]
9907,CUUR0000SEHF02,2002,M03,March,130.2,[{}]
9908,CUUR0000SEHF02,2002,M02,February,130.6,[{}]
