In [1]:
# Diagnostic Notebook: Investigate QCEW Employment Data for NAICS 211

import os
from dotenv import load_dotenv
import pandas as pd
import requests
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
from time import sleep
from tqdm import tqdm

# Load environment variables
load_dotenv()
BLS_API_KEY = os.environ["BLS_API_KEY"]

# Database config
DB_NAME = os.environ["DB_NAME"]
DB_USER = os.environ["DB_USER"]
DB_PASSWORD = os.environ["DB_PASSWORD"]
DB_HOST = os.environ.get("DB_HOST", "localhost")
DB_PORT = os.environ.get("DB_PORT", "5432")

connection_url = URL.create(
    "postgresql+psycopg2",
    username=DB_USER,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT,
    database=DB_NAME,
)
engine = create_engine(connection_url, echo=True)

# Let's test with just a few specific counties known for oil/gas activity
test_counties = [
    ("48113", "Dallas, TX"),  # Known for oil companies
    ("48201", "Harris, TX"),  # Houston area - energy hub
    ("48329", "Midland, TX"), # Major oil/gas producing area
    ("08123", "Weld, CO"),    # Major oil/gas producing area
    ("08013", "Boulder, CO")  # Tech hub, less oil/gas
]

# Try different ownership codes
ownership_codes = {
    "0": "Total (all ownerships)",
    "5": "Private sector",
    "8": "All government",
    "1": "Federal government",
    "2": "State government",
    "3": "Local government"
}

# Test different combinations
print("Testing NAICS 211 data availability...\n")

results = []
for county_fips, county_name in test_counties:
    state = county_fips[:2]
    county = county_fips[2:]
    
    print(f"\nTesting {county_name} (FIPS: {county_fips})")
    
    for ownership_code, ownership_desc in ownership_codes.items():
        # Try both exact NAICS 211 and broader category
        for naics in ["211", "210000"]:  # 210000 is the broader 2-digit NAICS
            sid = f"ENU{state}{county}10{ownership_code}{naics.zfill(6)}"
            
            payload = {
                "seriesid": [sid],
                "startyear": "2020",  # Focus on recent year
                "endyear": "2020",
                "registrationkey": BLS_API_KEY,
                "annualaverage": True
            }
            
            try:
                response = requests.post(
                    "https://api.bls.gov/publicAPI/v2/timeseries/data/",
                    json=payload,
                    headers={"Content-type": "application/json"}
                )
                data = response.json()
                
                if "Results" in data and "series" in data["Results"]:
                    series = data["Results"]["series"][0]
                    if "data" in series and len(series["data"]) > 0:
                        for obs in series["data"]:
                            if obs["period"] == "M13":
                                value = obs["value"]
                                if value != '-':
                                    result_entry = {
                                        "County": county_name,
                                        "FIPS": county_fips,
                                        "NAICS": naics,
                                        "Ownership": ownership_desc,
                                        "SeriesID": sid,
                                        "Year": obs["year"],
                                        "Employment": float(value)
                                    }
                                    results.append(result_entry)
                                    print(f"  ✓ Found data: {naics} - {ownership_desc} - {value} employees")
                    
            except Exception as e:
                print(f"  ❌ Error checking {sid}: {e}")
            
            sleep(0.5)  # Be gentle with API

# Display results as DataFrame
if results:
    df_results = pd.DataFrame(results)
    print("\n\n=== AVAILABLE NAICS 211 DATA ===")
    display(df_results)
else:
    print("\n❌ No NAICS 211 data found for any tested counties")

# Let's also check the state-level data for NAICS 211
print("\nChecking state-level data for Texas and Colorado...")
state_results = []

for state_code, state_name in [("48", "Texas"), ("08", "Colorado")]:
    # For state-level data, county code is "000"
    sid = f"ENU{state_code}000105211000"
    
    payload = {
        "seriesid": [sid],
        "startyear": "2019",
        "endyear": "2021",
        "registrationkey": BLS_API_KEY,
        "annualaverage": True
    }
    
    try:
        response = requests.post(
            "https://api.bls.gov/publicAPI/v2/timeseries/data/",
            json=payload,
            headers={"Content-type": "application/json"}
        )
        data = response.json()
        
        if "Results" in data and "series" in data["Results"]:
            series = data["Results"]["series"][0]
            if "data" in series:
                for obs in series["data"]:
                    if obs["period"] == "M13" and obs["value"] != '-':
                        state_results.append({
                            "State": state_name,
                            "Year": obs["year"],
                            "Employment": float(obs["value"])
                        })
                        print(f"  ✓ {state_name} {obs['year']}: {obs['value']} employees")
    
    except Exception as e:
        print(f"  ❌ Error checking state {state_name}: {e}")
    
    sleep(0.5)

if state_results:
    df_state = pd.DataFrame(state_results)
    print("\n\n=== STATE-LEVEL NAICS 211 DATA ===")
    display(df_state)
else:
    print("\n❌ No state-level NAICS 211 data found")

# Let's also check if any counties in our database have NAICS 211 data
print("\nChecking which counties in our database might have NAICS 211 data...")

# Test with a broader sample
with engine.connect() as conn:
    result = conn.execute(text("""
        SELECT DISTINCT county_fips
        FROM economic_indicators
        WHERE county_fips LIKE '48%'
        LIMIT 10
    """))
    sample_counties = [row[0] for row in result]

for county_fips in sample_counties:
    state = county_fips[:2]
    county = county_fips[2:]
    sid = f"ENU{state}{county}105211000"
    
    payload = {
        "seriesid": [sid],
        "startyear": "2021",
        "endyear": "2021",
        "registrationkey": BLS_API_KEY,
        "annualaverage": True
    }
    
    try:
        response = requests.post(
            "https://api.bls.gov/publicAPI/v2/timeseries/data/",
            json=payload,
            headers={"Content-type": "application/json"}
        )
        data = response.json()
        
        if "Results" in data and "series" in data["Results"]:
            series = data["Results"]["series"][0]
            if "data" in series and len(series["data"]) > 0:
                for obs in series["data"]:
                    if obs["period"] == "M13" and obs["value"] != '-':
                        print(f"  ✓ Found NAICS 211 data for county {county_fips}: {obs['value']} employees")
                        break
    
    except Exception as e:
        print(f"  ❌ Error checking county {county_fips}: {e}")
    
    sleep(0.5)

Testing NAICS 211 data availability...


Testing Dallas, TX (FIPS: 48113)

Testing Harris, TX (FIPS: 48201)

Testing Midland, TX (FIPS: 48329)

Testing Weld, CO (FIPS: 08123)

Testing Boulder, CO (FIPS: 08013)

❌ No NAICS 211 data found for any tested counties

Checking state-level data for Texas and Colorado...

❌ No state-level NAICS 211 data found

Checking which counties in our database might have NAICS 211 data...
2025-04-20 18:34:06,874 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2025-04-20 18:34:06,874 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-20 18:34:06,875 INFO sqlalchemy.engine.Engine select current_schema()
2025-04-20 18:34:06,875 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-20 18:34:06,876 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2025-04-20 18:34:06,876 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-04-20 18:34:06,877 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-04-20 18:34:06,877 INFO sqlalchemy.engine.Engine 
   

In [2]:
# Check broader NAICS 21 (Mining) category

import os
from dotenv import load_dotenv
import pandas as pd
import requests
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
from time import sleep
from tqdm import tqdm

# Load environment variables
load_dotenv()
BLS_API_KEY = os.environ["BLS_API_KEY"]

# Test with broader NAICS codes for mining sector
test_series = [
    ("48", "000", "21", "Texas - All Mining"),
    ("48", "329", "21", "Midland, TX - All Mining"),
    ("08", "123", "21", "Weld, CO - All Mining"),
    ("48", "000", "211", "Texas - Oil & Gas Extraction"),
    ("48", "000", "213", "Texas - Support Activities for Mining")
]

print("Testing broader NAICS categories for mining sector...")
results = []

for state, county, naics, description in test_series:
    sid = f"ENU{state}{county}105{naics.ljust(6, '0')}"
    
    payload = {
        "seriesid": [sid],
        "startyear": "2021",
        "endyear": "2021",
        "registrationkey": BLS_API_KEY,
        "annualaverage": True
    }
    
    try:
        response = requests.post(
            "https://api.bls.gov/publicAPI/v2/timeseries/data/",
            json=payload,
            headers={"Content-type": "application/json"}
        )
        data = response.json()
        
        # Print the full response for debugging
        print(f"\nChecking {description}:")
        print(f"Series ID: {sid}")
        
        if "Results" in data and "series" in data["Results"]:
            series = data["Results"]["series"][0]
            if "data" in series and len(series["data"]) > 0:
                for obs in series["data"]:
                    if obs["period"] == "M13":
                        value = obs["value"]
                        print(f"  Value: {value}")
                        if value != "-":
                            results.append({
                                "Description": description,
                                "NAICS": naics,
                                "Value": float(value),
                                "SeriesID": sid
                            })
            else:
                print("  No data returned")
        else:
            if "status" in data:
                print(f"  Status: {data['status']}")
            if "message" in data:
                if isinstance(data["message"], list):
                    for msg in data["message"]:
                        print(f"  Message: {msg}")
                else:
                    print(f"  Message: {data['message']}")
            else:
                print("  No results found")
    
    except Exception as e:
        print(f"  Error: {e}")
    
    sleep(1)

if results:
    df = pd.DataFrame(results)
    print("\n\nResults found:")
    display(df)
else:
    print("\n\nNo results found for any NAICS mining categories")

# Let's try one more approach - using the API's series search endpoint
print("\n\nSearching for series IDs that might contain NAICS 211 data...")

# Try finding all series IDs that contain "211" in their pattern
search_payload = {
    "startyear": "2021",
    "endyear": "2021",
    "catalog": True,  # Get catalog information
    "registrationkey": BLS_API_KEY
}

# Search for series IDs that match our pattern for a specific county
test_counties = ["48329", "08123"]  # Midland, TX and Weld, CO
for county_fips in test_counties:
    state = county_fips[:2]
    county = county_fips[2:]
    
    # Try different patterns
    patterns = [
        f"ENU{state}{county}10*211*",
        f"ENU{state}{county}*211*",
        f"EN*{state}{county}*211*"
    ]
    
    for pattern in patterns:
        print(f"\nSearching pattern: {pattern}")
        search_payload["seriesid"] = [pattern]
        
        try:
            response = requests.post(
                "https://api.bls.gov/publicAPI/v2/timeseries/data/",
                json=search_payload,
                headers={"Content-type": "application/json"}
            )
            data = response.json()
            
            if "Results" in data and "series" in data["Results"]:
                for series in data["Results"]["series"]:
                    if "catalog" in series:
                        print(f"  Found series: {series['seriesID']}")
                        print(f"    Series title: {series['catalog'].get('series_title', 'N/A')}")
                        print(f"    Area title: {series['catalog'].get('area_title', 'N/A')}")
        except Exception as e:
            print(f"  Error: {e}")
        
        sleep(0.5)

Testing broader NAICS categories for mining sector...

Checking Texas - All Mining:
Series ID: ENU48000105210000
  No data returned

Checking Midland, TX - All Mining:
Series ID: ENU48329105210000
  No data returned

Checking Weld, CO - All Mining:
Series ID: ENU08123105210000
  No data returned

Checking Texas - Oil & Gas Extraction:
Series ID: ENU48000105211000
  No data returned

Checking Texas - Support Activities for Mining:
Series ID: ENU48000105213000
  No data returned


No results found for any NAICS mining categories


Searching for series IDs that might contain NAICS 211 data...

Searching pattern: ENU4832910*211*

Searching pattern: ENU48329*211*

Searching pattern: EN*48329*211*

Searching pattern: ENU0812310*211*

Searching pattern: ENU08123*211*

Searching pattern: EN*08123*211*


## NOTE on Data Access

NAICS 211 data appears to be suppressed at the county level, and that NAICS 213111 serves as the available proxy for oil and gas employment in your analysis.


In [3]:
# BLS API Quota Check Script

import os
from dotenv import load_dotenv
import requests
from datetime import datetime

# Load environment variables
load_dotenv()
BLS_API_KEY = os.environ["BLS_API_KEY"]

def check_bls_quota():
    """
    Check the BLS API quota by making a minimal request and analyzing the response.
    The BLS API doesn't provide a direct quota check, but we can infer status from
    the API response or error messages.
    """
    
    print(f"Checking BLS API Quota at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 50)
    
    # Make a minimal request to check API status
    # Using a known series that should always work if quota is available
    payload = {
        "seriesid": ["CES0000000001"],  # Total nonfarm employment (national level)
        "startyear": "2024",
        "endyear": "2024",
        "registrationkey": BLS_API_KEY
    }
    
    try:
        response = requests.post(
            "https://api.bls.gov/publicAPI/v2/timeseries/data/",
            json=payload,
            headers={"Content-type": "application/json"}
        )
        
        # Parse response
        data = response.json()
        
        # Check for successful response
        if response.status_code == 200:
            print(f"API Status Code: {response.status_code} (OK)")
            
            # Check response status
            if "status" in data:
                print(f"API Response Status: {data['status']}")
            
            # Check for messages
            if "message" in data:
                if isinstance(data["message"], list):
                    print("API Messages:")
                    for msg in data["message"]:
                        print(f"  - {msg}")
                        # Look for specific quota messages
                        if "daily threshold" in msg.lower():
                            print("\n⚠️ WARNING: You may be approaching or have reached your daily limit!")
                        elif "exceeded" in msg.lower() or "limit" in msg.lower():
                            print("\n❌ ERROR: API limit appears to have been exceeded!")
                else:
                    print(f"API Message: {data['message']}")
            
            # Check if we got data back
            if "Results" in data and "series" in data["Results"]:
                print("\n✅ API is responding normally and returning data.")
            else:
                print("\n⚠️ API responded but didn't return expected data structure.")
                print("   This might indicate quota issues or other problems.")
        
        elif response.status_code == 429:
            print(f"API Status Code: {response.status_code} (Too Many Requests)")
            print("❌ You have exceeded your daily quota limit!")
            
        else:
            print(f"API Status Code: {response.status_code}")
            print("⚠️ Unexpected status code received.")
        
        # Additional quota information
        print("\n" + "=" * 50)
        print("BLS API Quota Information:")
        print("- Daily limit for registered users: 500 queries")
        print("- Daily limit for unregistered users: 50 queries")
        print("- The quota resets daily at midnight Eastern Time")
        print("- Your API key is registered, so you have the 500 query limit")
        
    except requests.exceptions.RequestException as e:
        print(f"Network error occurred: {e}")
    except Exception as e:
        print(f"Error occurred: {e}")
        print("Unable to determine API quota status")

# Run the quota check
if __name__ == "__main__":
    check_bls_quota()

# Optional: Function to check remaining quota by counting today's requests
def estimate_remaining_quota(requests_made_today):
    """
    Estimate remaining quota based on number of requests made today.
    """
    MAX_DAILY_QUOTA = 500  # For registered users
    remaining = MAX_DAILY_QUOTA - requests_made_today
    
    print(f"\nQuota Estimation:")
    print(f"Estimated requests made today: {requests_made_today}")
    print(f"Estimated remaining: {remaining}")
    
    if remaining <= 0:
        print("❌ You have likely exceeded your daily quota")
    elif remaining < 50:
        print("⚠️ You are approaching your daily limit")
    else:
        print(f"✅ You have approximately {remaining} requests remaining")
    
    return remaining

Checking BLS API Quota at 2025-04-20 18:37:45
API Status Code: 200 (OK)
API Response Status: REQUEST_SUCCEEDED
API Messages:

✅ API is responding normally and returning data.

BLS API Quota Information:
- Daily limit for registered users: 500 queries
- Daily limit for unregistered users: 50 queries
- The quota resets daily at midnight Eastern Time
- Your API key is registered, so you have the 500 query limit
