In [1]:
import requests 
import json 
import pandas as pd
from pathlib import Path

In [2]:
# Import data
home_dir = Path.home()
work_dir = (home_dir / 'Desktop' / 'GitHub' / 'election_inflation_analysis')
data = (work_dir / 'data')
raw_data = (data / 'raw')
code = Path.cwd()

output = work_dir / 'output'
save = data / 'clean'

In [3]:
# we need to define a dictionary with the series IDs by city / series:
categories = ['motor fuel', 'rent', 'food', 'apparel', 'transportation', 'medical care', 'recreation', 'education and communication']
category_codes = ['ETB', 'EHA', 'AF11', 'AA', 'AT', 'AM', 'AR', 'AE']
divisions = ['New England', 'Middle Atlantic', 'East North Central', 
             'West North Central', 'South Atlantic', 'East South Central', 
             'West South Central', 'Mountain', 'Pacific']
division_codes = ['0110S', '0120S', '0230S',
              '0240S', '0350S', '0360S',
              '0370S', '0480S', '0490S']
'''series_ids['New England'] = ['motor fuel': 'CUUR0110SETB',
                        'housing': 'CUUR0110SEHA',
                        'food': 'CUUR0110SAF11',
                        'apparel': 'CUUR0110SAA',
                        'transportation': 'CUUR0110SAT',
                        'medical care': 'CUUR0110SAM',
                        'recreation': 'CUUR0110SAR',
                        'education and communication': 'CUUR0110SAE'' '''
BLS_API_URL = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'
api_key = '66df55c828b84410b25b4658fdb6cfc4'
data_frames = []
for category, cat_code in zip(categories, category_codes):
    for division, division_code in zip(divisions, division_codes):
        series_id = f'CUUR{division_code}{cat_code}'
        # Prepare the API request
        headers = {'Content-type': 'application/json'}
        request_data = {
            'seriesid': [series_id],
            'registrationkey': api_key,
            'startyear': '2019',
            'endyear': '2024'
        }
        # Fetch data from the API
        response = requests.post(BLS_API_URL, json=request_data, headers=headers)
        # Check for a valid response
        if response.status_code == 200:
            # Parse JSON response
            data = response.json()
            # Extract relevant data
            series_data = data['Results']['series'][0]['data']
            df = pd.DataFrame(series_data)
            # Add category and city for context
            df['category'] = category
            df['divisions'] = division
            df['series_id'] = series_id
            # Keep only relevant columns and rename
            if df.shape[0] > 0:
                df = df[['category', 'divisions', 'series_id', 'year', 'period', 'value']]
                df.rename(columns={'value': 'price_index'}, inplace=True)
                # Append to the list of DataFrames
                data_frames.append(df)
            else:
                print(f"No data available for {series_id}")
        else:
            print(f"Failed to fetch data for {series_id}: {response.status_code}")

# Combine all DataFrames into a single panel DataFrame
panel_df = pd.concat(data_frames, ignore_index=True)

# Output the final DataFrame
print(panel_df)


                         category    divisions     series_id  year period  \
0                      motor fuel  New England  CUUR0110SETB  2024    M10   
1                      motor fuel  New England  CUUR0110SETB  2024    M09   
2                      motor fuel  New England  CUUR0110SETB  2024    M08   
3                      motor fuel  New England  CUUR0110SETB  2024    M07   
4                      motor fuel  New England  CUUR0110SETB  2024    M06   
...                           ...          ...           ...   ...    ...   
5034  education and communication      Pacific   CUUR0490SAE  2019    M05   
5035  education and communication      Pacific   CUUR0490SAE  2019    M04   
5036  education and communication      Pacific   CUUR0490SAE  2019    M03   
5037  education and communication      Pacific   CUUR0490SAE  2019    M02   
5038  education and communication      Pacific   CUUR0490SAE  2019    M01   

     price_index  
0        122.012  
1        126.997  
2        135.545  

In [7]:
# Save to CSV for further analysis
file = raw_data / 'bls_panel_data_division.csv'
panel_df.to_csv(file, index=False)