In [1]:
import os
import json
import requests
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import matplotlib.pyplot as plt
from io import StringIO
load_dotenv()

True

In [2]:
plt.rcParams["figure.figsize"] = (10,6)
plt.rcParams["axes.grid"] = True
BLS_API_KEY=os.getenv("BLS_API_KEY")

### definition of areas in and around cuyahoga county and definitions of 'advanced manufacturing'

In [3]:
CLEVELAND_MSA_CODE = "17460"

# Advanced manufacturing NAICS codes we care about
ADV_MFG_NAICS = {
    "3251": "Basic Chemical Manufacturing",
    "3254": "Pharmaceutical & Medicine Manufacturing",
    "3332": "Industrial Machinery Manufacturing",
    "3339": "Other General-Purpose Machinery Manufacturing",
    "3344": "Semiconductor & Other Electronic Component Mfg",
    "3345": "Navigational, Electromedical, & Control Instruments",
    "3359": "Other Electrical Equipment & Component Mfg (incl batteries)",
    "3361": "Motor Vehicle Manufacturing",
    "3362": "Motor Vehicle Body & Trailer Manufacturing",
    "3363": "Motor Vehicle Parts Manufacturing",
    "3364": "Aerospace Product & Parts Manufacturing",
    "3391": "Medical Equipment & Supplies Manufacturing"
}

BIOMED_TECH_NAICS = {
    "3254": "Pharmaceutical & Medicine Manufacturing",
    "3391": "Medical Equipment & Supplies Manufacturing",
    "5417": "Scientific Research & Development Services",
    "6215": "Medical & Diagnostic Laboratories",
    "6221": "General Medical & Surgical Hospitals",
    "3345": "Electromedical & Control Instruments (medical devices, imaging)",
}
CONVENTIONAL_TECH_NAICS = {
    "5112": "Software Publishers",
    "5182": "Data Processing, Hosting & Cloud Services",
    "5191": "Other Information Services (incl search engines)",
    "5415": "Computer Systems Design & Related Services",
    "5416": "Management, Scientific & Technical Consulting (tech-heavy)",
    "5173": "Wired & Wireless Telecommunications Carriers",
    "5179": "Other Telecommunications (data networks)",
    "3341": "Computer & Peripheral Equipment Manufacturing",
    "3342": "Communications Equipment Manufacturing",
}

FINANCE_NAICS = {
    "52":   "Finance & Insurance (supersector)",
    "521":  "Monetary Authorities & Central Banking",
    "5221": "Depository Credit Intermediation (Banks)",
    "5222": "Nondepository Credit (Lending, Mortgage)",
    "5231": "Securities & Investment Firms",
    "5232": "Securities Brokers & Exchanges",
    "5239": "Other Financial Investment Activities",
    "5241": "Insurance Carriers",
    "5242": "Insurance Agencies & Brokerages",
}

BLUE_COLLAR_NAICS = {
    "23":   "Construction (all types)",
    "31":   "Manufacturing (Food, Textiles, Plastics, etc.)",
    "32":   "Manufacturing (Fabrics, Wood, Paper, Printing)",
    "33":   "Manufacturing (Machinery, Metals, Appliances)",
    "48":   "Transportation (truck, rail, air)",
    "49":   "Warehousing & Logistics",
    "221":  "Utilities (electric, water, gas)",
    "811":  "Repair & Maintenance (auto, HVAC, machinery)",
}



In [4]:
# Correct BLS QCEW API base URL
BLS_BASE = "https://data.bls.gov/cew/data/api/"

def qcew_get(area, year, naics):
    """
    Pull QCEW data for a given area, year, NAICS code.
    area  = MSA code (Cleveland = 17460)
    year  = 2020-2024 etc.
    naics = NAICS code string (e.g., '3361')
    """
    # Correct URL format: /year/period/area/area_code.csv
    # For MSA data with specific NAICS, we need to use the size/industry endpoint
    url = f"{BLS_BASE}{year}/a/industry/{naics}.csv"
    
    print(f"Fetching: {url}")
    resp = requests.get(url)
    
    if resp.status_code != 200:
        print(f"Error fetching NAICS {naics}: Status {resp.status_code}")
        print(f"Response: {resp.text[:200]}")
        raise Exception(f"Error fetching NAICS {naics}: {resp.text}")
    
    # Parse CSV response
    df = pd.read_csv(StringIO(resp.text))
    
    # Debug: Print columns and sample data
    # print(f"\nColumns available: {df.columns.tolist()}")
    # print(f"Total rows before filtering: {len(df)}")
    # if len(df) > 0:
    #     print(f"Sample area_fips values: {df['area_fips'].unique()[:10]}")
    #     print(f"Looking for area_fips: {area}")
    
    # Filter for Cleveland MSA
    df = df[df['area_fips'] == area]
    
    return df

## Biomed, Tech dataframes

In [5]:
import os
os.environ['PYTHON_GIL'] = '0'  # Must set before importing threading
import threading
from queue import Queue

In [6]:
def get_df_by_dict(input_q, output_q):
    """Worker function - processes one dictionary at a time"""
    while True:
        naics_dict = input_q.get()  # Don't use 'dict' as variable name!
        
        if naics_dict is None:  # Poison pill to stop worker
            input_q.task_done()
            break
        
        frames = []
        for year in years:
            for naics, label in naics_dict.items():
                try:
                    df = qcew_get("39035", year, naics)
                    if not df.empty:
                        df["sector_name"] = label
                        df["year"] = year
                        frames.append(df)
                        print(f"Successfully fetched {label} for {year}")
                    else:
                        print(f"No data for {label} (NAICS {naics}) in {year}")
                except Exception as e:
                    print(f"Failed to fetch {label} (NAICS {naics}) for {year}: {e}")
        
        if frames:
            out_df = pd.concat(frames, ignore_index=True)
            output_q.put(out_df)
            print(f"Completed processing - Total rows: {len(out_df)}")
        else:
            print("No data fetched successfully for this dictionary")
            output_q.put(None)
        
        input_q.task_done()

In [9]:
input_list = [ADV_MFG_NAICS, BIOMED_TECH_NAICS, CONVENTIONAL_TECH_NAICS, FINANCE_NAICS]
years = list(range(2015,2024))
# Create queues
input_queue = Queue()
output_queue = Queue()

# Start workers (one per dictionary for parallel processing)
num_workers = len(input_list)
workers = []
for i in range(num_workers):
    t = threading.Thread(target=get_df_by_dict, args=(input_queue, output_queue))
    t.start()
    workers.append(t)

# Send work to workers
for naics_dict in input_list:
  input_queue.put(naics_dict)

# Send poison pills to stop workers
for _ in range(num_workers):
  input_queue.put(None)

# CRITICAL: Wait for all workers to finish
for t in workers:
  t.join()

# Now collect results (workers are done, so queue won't change)
results = []
while not output_queue.empty():
  result = output_queue.get()
  if result is not None:  # Skip None entries
      results.append(result)

# Assign results
if len(results) >= 4:
  adv_mfg_df, biomed_tech_df, tech_df, finance_df = results[:4]
else:
  print(f"Warning: Expected 4 results, got {len(results)}")
  # Handle partial results gracefully
  adv_mfg_df = results[0] if len(results) > 0 else pd.DataFrame()
  biomed_tech_df = results[1] if len(results) > 1 else pd.DataFrame()
  tech_df = results[2] if len(results) > 2 else pd.DataFrame()
  finance_df = results[3] if len(results) > 3 else pd.DataFrame()
  

Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/3251.csvFetching: https://data.bls.gov/cew/data/api/2015/a/industry/3254.csv
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/5112.csv
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/52.csv

Successfully fetched Software Publishers for 2015
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/5182.csv
Successfully fetched Pharmaceutical & Medicine Manufacturing for 2015
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/3391.csv
Successfully fetched Basic Chemical Manufacturing for 2015
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/3254.csv
Successfully fetched Pharmaceutical & Medicine Manufacturing for 2015
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/3332.csv
Successfully fetched Data Processing, Hosting & Cloud Services for 2015
Fetching: https://data.bls.gov/cew/data/api/2015/a/industry/5191.csv
Successfully fetched Finance & Insurance (supersector) for 2

In [10]:
adv_mfg_df

Unnamed: 0,area_fips,own_code,industry_code,agglvl_code,size_code,year,qtr,disclosure_code,annual_avg_estabs,annual_avg_emplvl,...,oty_total_annual_wages_pct_chg,oty_taxable_annual_wages_chg,oty_taxable_annual_wages_pct_chg,oty_annual_contributions_chg,oty_annual_contributions_pct_chg,oty_annual_avg_wkly_wage_chg,oty_annual_avg_wkly_wage_pct_chg,oty_avg_annual_pay_chg,oty_avg_annual_pay_pct_chg,sector_name
0,39035,5,3254,76,0,2015,A,,7,188,...,-81.6,-9585839,-76.0,-291641,-64.3,-1349,-39.7,-70177,-39.8,Pharmaceutical & Medicine Manufacturing
1,39035,5,3391,76,0,2015,A,,65,1945,...,2.0,454757,2.3,-31487,-6.2,25,2.9,1331,3.0,Medical Equipment & Supplies Manufacturing
2,39035,2,5417,76,0,2015,A,N,1,0,...,0.0,0,0.0,0,0.0,0,0.0,0,0.0,Scientific Research & Development Services
3,39035,5,5417,76,0,2015,A,,119,1860,...,7.1,3105075,17.1,124283,29.6,25,1.4,1300,1.4,Scientific Research & Development Services
4,39035,5,6215,76,0,2015,A,,62,392,...,-1.1,-381262,-8.3,-8819,-6.5,28,2.8,1491,2.9,Medical & Diagnostic Laboratories
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,39035,5,6215,76,0,2023,A,,67,837,...,9.7,2011306,29.2,131953,55.0,-3,-0.3,-154,-0.3,Medical & Diagnostic Laboratories
77,39035,1,6221,76,0,2023,A,,1,4492,...,11.3,0,0.0,0,0.0,125,6.6,6478,6.6,General Medical & Surgical Hospitals
78,39035,3,6221,76,0,2023,A,,3,6379,...,7.3,0,0.0,0,0.0,-30,-1.6,-1557,-1.6,General Medical & Surgical Hospitals
79,39035,5,6221,76,0,2023,A,,17,61317,...,8.3,-9443742,-79.0,-1245559,-97.8,87,5.3,4553,5.4,General Medical & Surgical Hospitals
