In [18]:
# Imports, constants, and reads
import pandas as pd
import requests as rq
from requests.exceptions import HTTPError
import json
import api_key
import time
import datetime as dt
from IPython.display import clear_output

URL_ENDPOINT = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

national_series_list = pd.read_csv('../csv_from_excel/national_seriesID_list.csv')
state_series_list = pd.read_csv('../state_scrape/output/state_seriesID_list.csv')
national_series_dimension = pd.read_csv('../cleaning/output/national_seriesID_list_cleaned.csv')
state_series_dimension = pd.read_csv('../cleaning/output/state_series_dimension.csv_cleaned.csv')
survey_table = pd.read_csv('/Users/danielsagher/Dropbox/Documents/projects/bls_api_project/csv_from_excel/survey_table.csv')

NOW = dt.datetime.now().strftime('%d-%b-%Y_%H:%M:%S')

START_YEAR = "2002"
END_YEAR = "2021"

In [2]:
# Define main
def main(series_input, name_of_file):

    '''
    This main function takes in a list of seriesID's and a string which will used to name the CSV file output. 
    Any number of seriesID's can be inputted. 
    '''
    
    data_results = derated_call(series_input, START_YEAR, END_YEAR)

    message_list = message_retriever(data_results)

    df = dataframe_maker(data_results)
    
    df.to_csv(f'../api_call/main_output/{name_of_file}_{NOW}.csv', index=False)
    message_list.to_csv(f'../api_call/main_output/{name_of_file}_msglst_{NOW}.csv', index=False)

    
    time.sleep(1)
    clear_output()
    
    return df




In [3]:
# Define API call

def get_series_id(series, start_year, end_year):

    headers = {'Content-Type': 'application/json'}
    payload = json.dumps({"seriesid": series, "startyear": start_year, "endyear": end_year, "registrationKey": api_key.API_KEY})
    
    try:
        ro = rq.post(URL_ENDPOINT, data=payload, headers=headers)
        ro.raise_for_status()
        result = ro.json()

    except HTTPError as e:
         
         print(f'HTTP Error: {e}')
         return None, 'HTTP Error'
        
    except Exception as e:

        print(f'An error occurred: {e}')
        return None, 'Error'

    return result, 'Done'


In [4]:
# Define message retriever

def message_retriever(data_results):
    '''
    When there is no data for a specific year and seriesID, a message is returned. 
    This function compiles a new DataFrame with those messages, with seperate columns for |message|serialID|year|
    '''
    message_list = []
    for call in data_results:

        message_list.extend(call['message'])

    df = pd.DataFrame(message_list, columns = ['message'])
    df['serialID'] = df['message'].apply(lambda x: x[29:-10])
    df['year'] = df['message'].apply(lambda x: x[-4:])

    return df

In [5]:
# Define rate limit workaround

def derated_call(lst, start_year = '2002', end_year = '2021'):

    lst = list(lst['seriesID'])
    final = [] 
    batch_size = 50 

    while lst: 
        data = lst[:batch_size] # Get the first 50 items of the batch
        lst = lst[batch_size:] # Remove processed items from list

        print(f'Processessing batch of size: {len(data)}')
        print(data)

        result, status = get_series_id(data, start_year, end_year)
        
        if status == 'Done': 
            print('API call successful')
            final.append(result) # Add the results to the final list
            print('Sleeping for 5 seconds') # Call API
        elif status == 'HTTP Error':
            print('HTTP Error occurred during API call')
        else:
            print('Error occurred during API call')
        
        time.sleep(5) # Sleep
        clear_output()

    return final

In [6]:
# Define DataFrame maker

def dataframe_maker(data_results):
    
    final_df = pd.DataFrame([])
    
    print('Creating DataFrame...')
    for call in data_results:  # goes into each individual call
        
        for series in call['Results']['series']:
            seriesID = series['seriesID']
            
            for data_point in series['data']:
                data_dict = {
                    'seriesID': seriesID,
                    'year': data_point['year'],
                    'period': data_point['period'],
                    'period_name': data_point['periodName'],
                    'value': data_point['value'],
                    'footnotes': data_point['footnotes'] if not '[{}]' in data_point else None
                }
                
                df = pd.DataFrame([data_dict])
                final_df = pd.concat([final_df, df], ignore_index=True)
    
    print('DataFrame Created')
    return final_df

        

In [None]:
# Call main for national and state
final_national_df  = main(national_series_list, 'national_results')
final_state_df = main(state_series_list, 'state_results')

In [19]:
state_merged = pd.merge(state_results, state_series_dimension, how='left')
double_state_merged = pd.merge(state_merged, survey_table, how='left')
double_state_merged

Unnamed: 0,seriesID,year,period,period_name,value,footnotes,series,state,survey,is_adjusted,survey_name
0,SMS01000000000000001,2021,M12,December,2083.0,[{}],Total Nonfarm,Alabama,CES,True,Current Employment Statistics
1,SMS01000000000000001,2021,M11,November,2077.1,[{}],Total Nonfarm,Alabama,CES,True,Current Employment Statistics
2,SMS01000000000000001,2021,M10,October,2070.8,[{}],Total Nonfarm,Alabama,CES,True,Current Employment Statistics
3,SMS01000000000000001,2021,M09,September,2055.1,[{}],Total Nonfarm,Alabama,CES,True,Current Employment Statistics
4,SMS01000000000000001,2021,M08,August,2055.2,[{}],Total Nonfarm,Alabama,CES,True,Current Employment Statistics
...,...,...,...,...,...,...,...,...,...,...,...
323623,SMU56000009000000001,2002,M05,May,64.1,[{}],Government,Wyoming,CES,False,Current Employment Statistics
323624,SMU56000009000000001,2002,M04,April,63.0,[{}],Government,Wyoming,CES,False,Current Employment Statistics
323625,SMU56000009000000001,2002,M03,March,63.4,[{}],Government,Wyoming,CES,False,Current Employment Statistics
323626,SMU56000009000000001,2002,M02,February,62.3,[{}],Government,Wyoming,CES,False,Current Employment Statistics


In [22]:
national_merged = pd.merge(national_results, national_series_dimension, how='left')
double_national_merged = pd.merge(national_merged, survey_table, how='left')
double_national_merged

Unnamed: 0,seriesID,year,period,period_name,value,footnotes,series,survey,is_adjusted,survey_name
0,WSU100,2021,M12,December,1,[{}],"Number of work stoppages, beginning in the period",WSP,,Work Stoppages
1,WSU100,2021,M11,November,1,[{}],"Number of work stoppages, beginning in the period",WSP,,Work Stoppages
2,WSU100,2021,M10,October,4,[{}],"Number of work stoppages, beginning in the period",WSP,,Work Stoppages
3,WSU100,2021,M09,September,1,[{}],"Number of work stoppages, beginning in the period",WSP,,Work Stoppages
4,WSU100,2021,M08,August,1,[{}],"Number of work stoppages, beginning in the period",WSP,,Work Stoppages
...,...,...,...,...,...,...,...,...,...,...
40390,LEU0252919700,2003,Q01,1st Quarter,1104,[{}],"Median wkly earns, Emp FT, Wag & sal wrkrs, Ad...",CPS,,Current Population Survey
40391,LEU0252919700,2002,Q04,4th Quarter,1125,[{}],"Median wkly earns, Emp FT, Wag & sal wrkrs, Ad...",CPS,,Current Population Survey
40392,LEU0252919700,2002,Q03,3rd Quarter,1101,[{}],"Median wkly earns, Emp FT, Wag & sal wrkrs, Ad...",CPS,,Current Population Survey
40393,LEU0252919700,2002,Q02,2nd Quarter,1105,[{}],"Median wkly earns, Emp FT, Wag & sal wrkrs, Ad...",CPS,,Current Population Survey
