In [168]:
# Imports, constants, and reads
import pandas as pd
import requests as rq
from requests.exceptions import HTTPError
import json
import api_key
import time
import datetime as dt
from IPython.display import clear_output

URL_ENDPOINT = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

national_series_list = pd.read_csv('../csv_from_excel/national_seriesID_list.csv')
state_series_list = pd.read_csv('../state_scrape/output/state_seriesID_list.csv')

NOW = dt.datetime.now().strftime('%d-%b-%Y_%H:%M:%S')

START_YEAR = "2002"
END_YEAR = "2021"

In [169]:
# Define main
def main(series_input, name_of_file):
    
    data_results = derated_call(series_input, START_YEAR, END_YEAR)

    message_list = message_retriever(data_results)

    df = dataframe_maker(data_results)
    
    df.to_csv(f'../api_call/main_output/{name_of_file}_{NOW}.csv', index=False)
    message_list.to_csv(f'../api_call/main_output/{name_of_file}_msglst_{NOW}.csv', index=False)

    
    time.sleep(1)
    clear_output()
    
    return df




In [170]:
# Define API call

def get_series_id(series, start_year, end_year):
    
    headers = {'Content-Type': 'application/json'}
    payload = json.dumps({"seriesid": series, "startyear": start_year, "endyear": end_year, "registrationKey": api_key.API_KEY})
    
    try:
        ro = rq.post(URL_ENDPOINT, data=payload, headers=headers)
        ro.raise_for_status()
        result = ro.json()

    except HTTPError as e:
         
         print(f'HTTP Error: {e}')
         return None, 'HTTP Error'
        
    except Exception as e:

        print(f'An error occurred: {e}')
        return None, 'Error'

    return result, 'Done'


In [171]:
# Define message retriever

def message_retriever(data_results):
    
    message_list = []
    for call in data_results:

        message_list.extend(call['message'])

    df = pd.DataFrame(message_list, columns = ['message'])
    df['serialID'] = df['message'].apply(lambda x: x[29:-10])
    df['year'] = df['message'].apply(lambda x: x[-4:])
    return df

In [172]:
# Define rate limit workaround

def derated_call(lst, start_year = '2002', end_year = '2021'):
    lst = list(lst['seriesID'])
    final = [] # Initialize final bucket
    batch_size = 50 # Determine batch size

    while lst: # While lst exists 
        data = lst[:batch_size] # Get the first 50 items of the batch
        lst = lst[batch_size:] # Remove processed items from list

        print(f'Processessing batch of size: {len(data)}')
        print(data)

        result, status = get_series_id(data, start_year, end_year)
        
        if status == 'Done': 
            print('API call successful')
            final.append(result) # Add the results to the final list
            print('Sleeping for 5 seconds') # Call API
        elif status == 'HTTP Error':
            print('HTTP Error occurred during API call')
        else:
            print('Error occurred during API call')
        

        time.sleep(5) # Sleep
        clear_output()

    return final

In [173]:
# Define DataFrame maker

def dataframe_maker(data_results):
    final_df = pd.DataFrame([])
    
    print('Creating DataFrame...')
    for call in data_results:  # goes into each individual call
        
        for series in call['Results']['series']:
            seriesID = series['seriesID']
            
            for data_point in series['data']:
                data_dict = {
                    'seriesID': seriesID,
                    'year': data_point['year'],
                    'period': data_point['period'],
                    'period_name': data_point['periodName'],
                    'value': data_point['value'],
                    'footnotes': data_point['footnotes'] if not '[{}]' in data_point else None
                }
                
                df = pd.DataFrame([data_dict])
                final_df = pd.concat([final_df, df], ignore_index=True)
    
    print('DataFrame Created')
    return final_df

        

In [174]:
# Call main for national and state
final_national_df  = main(national_series_list[0:1], 'national_results_test2')
# final_state_df = main(state_series_list[0:50], 'state_results_test2')

In [175]:
final_national_df

Unnamed: 0,seriesID,year,period,period_name,value,footnotes
0,WSU100,2021,M12,December,1,[{}]
1,WSU100,2021,M11,November,1,[{}]
2,WSU100,2021,M10,October,4,[{}]
3,WSU100,2021,M09,September,1,[{}]
4,WSU100,2021,M08,August,1,[{}]
...,...,...,...,...,...,...
235,WSU100,2002,M05,May,3,[{}]
236,WSU100,2002,M04,April,2,[{}]
237,WSU100,2002,M03,March,1,[{}]
238,WSU100,2002,M02,February,1,[{}]
