# Jupyter Notebook: Load BLS QCEW Employment Data from API (NAICS 211 + 213111) into economic_indicators


In [3]:
# Jupyter Notebook: Load BLS QCEW Employment Data from API (NAICS 211 + 213111) into economic_indicators

import os
from dotenv import load_dotenv
import pandas as pd
import requests
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
from time import sleep
from tqdm import tqdm

# Load environment variables
load_dotenv()
BLS_API_KEY = os.environ["BLS_API_KEY"]

# Database config
DB_NAME = os.environ["DB_NAME"]
DB_USER = os.environ["DB_USER"]
DB_PASSWORD = os.environ["DB_PASSWORD"]
DB_HOST = os.environ.get("DB_HOST", "localhost")
DB_PORT = os.environ.get("DB_PORT", "5432")

connection_url = URL.create(
    "postgresql+psycopg2",
    username=DB_USER,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT,
    database=DB_NAME,
)
engine = create_engine(connection_url, echo=True)



In [None]:
# ----------------------------------------
# Query target county-year pairs
# ----------------------------------------

with engine.connect() as conn:
    result = conn.execute(text("""
        SELECT DISTINCT county_fips, year
        FROM economic_indicators
        WHERE (county_fips LIKE '08%' OR county_fips LIKE '48%')
          AND year BETWEEN 2015 AND 2021
        ORDER BY year
    """))
    county_years = list(result.mappings())



In [None]:
# ----------------------------------------
# Build list of all series IDs for NAICS 211 and 213111
# ----------------------------------------

series_map = {}
for row in county_years:
    state = row["county_fips"][:2]
    county = row["county_fips"][2:]
    year = row["year"]
    for naics in ["211", "213111"]:
        naics6 = naics.zfill(6)
        sid = f"ENU{state}{county}5{naics6}"
        series_map[sid] = {"county_fips": state + county, "year": year, "naics": naics}



In [None]:
# ----------------------------------------
# Batch query 50 at a time
# ----------------------------------------

all_series = list(series_map.keys())
batch_size = 50
records = []

for i in tqdm(range(0, len(all_series), batch_size), desc="Fetching BLS data"):
    batch_ids = all_series[i:i+batch_size]
    payload = {
        "seriesid": batch_ids,
        "startyear": "2015",
        "endyear": "2021",
        "registrationkey": BLS_API_KEY
    }
    url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    headers = {"Content-type": "application/json"}

    try:
        response = requests.post(url, json=payload, headers=headers)
        data = response.json()
        if "Results" not in data:
            print(f"⚠️ No results in batch {i}")
            continue

        for series in data["Results"]["series"]:
            sid = series["seriesID"]
            meta = series_map.get(sid, {})
            for obs in series["data"]:
                if obs["period"] != "A":
                    continue  # annual only
                records.append({
                    "county_fips": meta["county_fips"],
                    "year": int(obs["year"]),
                    f"qcew_emp_{meta['naics']}": float(obs["value"])
                })
    except Exception as e:
        print(f"❌ Batch {i} failed: {e}")
    sleep(1.5)  # avoid rate limits




In [None]:
# ----------------------------------------
# Convert to DataFrame and reshape for update
# ----------------------------------------

df = pd.DataFrame(records)
if df.empty:
    print("No records found.")
else:
    df = df.groupby(["county_fips", "year"]).first().reset_index()
    print(f"✅ {len(df)} records ready to update.")

    with engine.begin() as conn:
        for _, row in df.iterrows():
            conn.execute(
                text("""
                    UPDATE economic_indicators
                    SET qcew_emp_211 = COALESCE(:emp211, qcew_emp_211),
                        qcew_emp_213111 = COALESCE(:emp213, qcew_emp_213111)
                    WHERE county_fips = :county_fips AND year = :year
                """),
                {
                    "emp211": row.get("qcew_emp_211"),
                    "emp213": row.get("qcew_emp_213111"),
                    "county_fips": row["county_fips"],
                    "year": row["year"]
                }
            )

    print("✅ BLS API QCEW employment data updated for NAICS 211 and 213111.")