In [2]:
# Jupyter Notebook: Load BEA Employment Data to economic_indicators Table

import os
from dotenv import load_dotenv
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
import requests
from time import sleep

# Load environment variables
load_dotenv()

# Database connection
DB_NAME = os.environ["DB_NAME"]
DB_USER = os.environ["DB_USER"]
DB_PASSWORD = os.environ["DB_PASSWORD"]
DB_HOST = os.environ.get("DB_HOST", "localhost")
DB_PORT = os.environ.get("DB_PORT", "5432")

# BEA API Key
BEA_API_KEY = os.environ["BEA_API_KEY"]

# Create engine
connection_url = URL.create(
    "postgresql+psycopg2",
    username=DB_USER,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT,
    database=DB_NAME,
)
engine = create_engine(connection_url, echo=True)

# ----------------------------------------
# First, let's check what tables are available
# ----------------------------------------
def get_available_tables():
    url = "https://apps.bea.gov/api/data"
    params = {
        "UserID": BEA_API_KEY,
        "method": "GetParameterValues",
        "datasetname": "Regional",
        "ParameterName": "TableName",
        "ResultFormat": "json"
    }
    
    resp = requests.get(url, params=params)
    try:
        results = resp.json()["BEAAPI"]["Results"]["ParamValue"]
        return results
    except KeyError:
        print("Error fetching available tables")
        print(resp.json())
        return []

# Check what employment-related tables are available
print("Checking available tables...")
tables = get_available_tables()
for table in tables:
    if "EMP" in table.get("Key", "") or "employment" in table.get("Desc", "").lower():
        print(f"  - {table['Key']}: {table['Desc']}")

# ----------------------------------------
# Try using state-level data for now
# ----------------------------------------
def fetch_bea_state_employment(year):
    """
    Fetch state-level employment data from BEA API
    Using SA25 - Total Full-Time and Part-Time Employment by Industry
    """
    url = "https://apps.bea.gov/api/data"
    
    params = {
        "UserID": BEA_API_KEY,
        "method": "GetData",
        "datasetname": "Regional",
        "TableName": "SAEMP25N",  # State employment by NAICS
        "LineCode": "10",         # Line code for mining, quarrying, and oil and gas extraction
        "GeoFIPS": "STATE",       # State level
        "Year": str(year),
        "ResultFormat": "json"
    }
    
    resp = requests.get(url, params=params)
    
    try:
        results = resp.json()["BEAAPI"]["Results"]["Data"]
    except KeyError:
        print(f"❌ Could not find 'Data' field for year {year}")
        print("Response:", resp.json())
        return pd.DataFrame()
    
    df = pd.DataFrame(results)
    df = df[["GeoFips", "TimePeriod", "DataValue"]]
    df.columns = ["state_fips", "year", "employment"]
    
    df["state_fips"] = df["state_fips"].astype(str).str.zfill(2)
    df["year"] = df["year"].astype(int)
    df["employment"] = pd.to_numeric(df["employment"].str.replace(",", ""), errors='coerce')
    
    return df

# Fetch state-level data from 2015 to 2021
state_data = []
for year in range(2015, 2022):
    print(f"Fetching state data for year {year}...")
    df = fetch_bea_state_employment(year)
    if not df.empty:
        state_data.append(df)
    sleep(1)

if state_data:
    combined_state_df = pd.concat(state_data)
    print("\nState-level mining employment data:")
    display(combined_state_df.head(10))
    
    # For counties, we can at least add state-level data to counties in those states
    # This is a workaround since county-level employment data is not available
    with engine.connect() as conn:
        result = conn.execute(text("""
            SELECT DISTINCT SUBSTRING(county_fips FROM 1 FOR 2) as state_fips, year
            FROM economic_indicators
            WHERE county_fips LIKE '08%' OR county_fips LIKE '48%'
        """))
        target_states = list(result.mappings())
    
    # Filter state data to match target states
    df_filtered = combined_state_df[combined_state_df.apply(
        lambda row: any(row["state_fips"] == item["state_fips"] and row["year"] == item["year"] for item in target_states),
        axis=1
    )]
    
    # Add column to economic_indicators if it doesn't exist
    with engine.connect() as conn:
        result = conn.execute(text("""
            SELECT column_name 
            FROM information_schema.columns 
            WHERE table_name = 'economic_indicators'
        """))
        existing_columns = [row[0] for row in result]
        
    if 'bea_state_mining_employment' not in existing_columns:
        with engine.begin() as conn:
            print("Adding column bea_state_mining_employment to economic_indicators table...")
            conn.execute(text("ALTER TABLE economic_indicators ADD COLUMN bea_state_mining_employment NUMERIC"))
    
    # Update the table with state-level data for reference
    with engine.begin() as conn:
        for _, row in df_filtered.iterrows():
            conn.execute(
                text("""
                    UPDATE economic_indicators
                    SET bea_state_mining_employment = :employment
                    WHERE SUBSTRING(county_fips FROM 1 FOR 2) = :state_fips AND year = :year
                """),
                {
                    "employment": row["employment"],
                    "state_fips": row["state_fips"],
                    "year": row["year"]
                }
            )
    
    print("✅ BEA state-level mining employment data added to economic_indicators table.")
    print("Note: This is state-level data applied to all counties within each state.")
else:
    print("❌ No state data retrieved from BEA API.")



Checking available tables...
  - SAACArtsEmp: ACPSA full-time and part-time employment by ACPSA industry
  - SAINC4: Personal income and employment by major component
  - SAOEMP: Outdoor recreation satellite account employment
  - SASUMMARY: State annual summary statistics: personal income, GDP, consumer spending, price indexes, and employment
Fetching state data for year 2015...
❌ Could not find 'Data' field for year 2015
Response: {'BEAAPI': {'Request': {'RequestParam': [{'ParameterName': 'USERID', 'ParameterValue': '2B63A93E-145E-4754-AA79-A0264DBA3FC9'}, {'ParameterName': 'METHOD', 'ParameterValue': 'GETDATA'}, {'ParameterName': 'DATASETNAME', 'ParameterValue': 'REGIONAL'}, {'ParameterName': 'TABLENAME', 'ParameterValue': 'SAEMP25N'}, {'ParameterName': 'LINECODE', 'ParameterValue': '10'}, {'ParameterName': 'GEOFIPS', 'ParameterValue': 'STATE'}, {'ParameterName': 'YEAR', 'ParameterValue': '2015'}, {'ParameterName': 'RESULTFORMAT', 'ParameterValue': 'JSON'}]}, 'Error': {'APIErrorCode