In [2]:
# Import packages
import requests
import json
import pandas as pd
import numpy as np
from pathlib import Path
# import config

# Set basic URL request components
base_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
api_key = input('Please enter your BLS API Key here: ')
headers = {'Content-type': 'application/json'}

# Use the external `conversions.csv` to format the series for desired data from BLS
naics_conversions = pd.read_csv('../scripts/conversions.csv', dtype={'BLS MPU': object})
bls_prod_ids = ['MPU' + s + '012' for s in naics_conversions['BLS MPU']]

# Create a DataFrame called tfp [total factor productivity] to hold results
tfp = pd.DataFrame({id: [np.nan] * len(range(2000, 2022)) for id in bls_prod_ids}, index=range(2000, 2022))

# Write a function that can take a list of series, a df, and a target location and can create an output file
def search_bls(df, series, start, end, output, url=base_url, key=api_key, headers=headers):
    # If the file at output already exists, don't overwrite it
    if Path(output).is_file():
        df = pd.read_csv(output, index_col=0)

    # Format the query
    query = json.dumps({
        "seriesid": series,
        "startyear": f'{start}',
        "endyear": f'{end}',
        "registrationkey": key})
    
    # Submit the query using requests and return False if not processed properly
    result = requests.post(url, headers=headers, data=query)
    try: 
        result = result.json()['Results']['series']
    except (KeyError, TypeError) as e: 
        return result

    # Loop through results from each series and add to the DataFrame
    for series in result:
        for item in series['data']:
            df.loc[int(item['year']), series['seriesID']] = item['value']

    # Write the output to the target directory and return the DataFrame
    df.to_csv(output)
    return True


# Collect the data in batches of 25 series at a time and 10 years of data at a time
for i in range(len(bls_prod_ids) // 25 + 1):
    j = min(i * 25 + 25, len(bls_prod_ids))

    for years in zip([2000, 2010, 2020], [2009, 2019, 2021]):
        res = search_bls(tfp, bls_prod_ids[i * 25: j], years[0], years[1], '../../data/data_raw/industry_productivity.csv')
        if res is True: print(f'Data collected for series {i * 25}–{j} and years {years[0]}–{years[1]}')
        else: print(res, '\n', f'Error in series {i * 25}–{j} in years {years[0]}–{years[1]}. Trying more fine-grained queries now!')

Please enter your BLS API Key here: adc35336c7bf4e8bb8a0a7d588800215


FileNotFoundError: [Errno 2] No such file or directory: '../scripts/conversions.csv'