In [None]:
%load_ext autoreload
%autoreload 3

In [None]:
import logging
import sys

import energy_comms
from energy_comms.extract.bls import QCEW_YEARS

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter()
handler.setFormatter(formatter)
logger.handlers = [handler]

# Setup

In [None]:
msa_county_raw_df = energy_comms.extract.bls.extract_msa_county_crosswalk()
msa_to_county_df = energy_comms.transform.bls.transform_msa_county_crosswalk(
    msa_county_raw_df
)
non_msa_to_county_raw_df = (
    energy_comms.extract.bls.extract_nonmsa_county_crosswalk()
)
non_msa_to_county_df = energy_comms.transform.bls.transform_nonmsa_county_crosswalk(
    non_msa_to_county_raw_df, msa_to_county_df
)

# Get fossil employment qualifying areas

New guidance fossil NAICS codes: 211, 2121, 213111, 213112, 213113, 32411, 4861, 4862

Old guidance fossil NAICS codes: 2121, 211, 213, 23712, 486, 4247, 22112

In [None]:
# a list of all NAICS codes you might want to consider from the new and old guidance
ALL_FOSSIL_NAICS_CODES = [
    "211",
    "2121",
    "213111",
    "213112",
    "213113",
    "32411",
    "4861",
    "4862",
    "213",
    "23712",
    "486",
    "4247",
    "22112"
]

Transform the QCEW data. This creates a dataframe with each record representing a unique year, county, and NAICS code (industry code) combo.

In [None]:
# do one year at a time so the concatenated dataframe isn't as big
qcew_df = pd.DataFrame()
for year in QCEW_YEARS:
    year_df = energy_comms.extract.bls.extract_qcew_data(years=[year])
    if year_df.empty:
        continue
    year_df = energy_comms.transform.bls.transform_qcew_data(
        year_df,
        msa_county_crosswalk=msa_to_county_df,
        non_msa_county_crosswalk=non_msa_to_county_df,
        fossil_naics_codes=ALL_FOSSIL_NAICS_CODES
    )
    qcew_df = pd.concat([qcew_df, year_df])

In [None]:
qcew_df.head(3)

In [None]:
qcew_df[qcew_df.area_title.str.contains("Abilene")]

**Update this list of NAICS codes to see which areas would qualify.**

For example, all the new guidance codes + "22112"

In [None]:
NAICS_CODES_TO_CONSIDER = [
    "211",
    "2121",
    "213111",
    "213112",
    "213113",
    "32411",
    "4861",
    "4862",
    "22112"
]

In [None]:
filtered_qcew_df = qcew_df[qcew_df.industry_code.isin(["10"] + NAICS_CODES_TO_CONSIDER)]

In [None]:
fossil_employment_df = energy_comms.generate_qualifying_areas.fossil_employment_qualifying_areas(
    qcew_df=filtered_qcew_df)

Now you can see if a certain county or MSA still qualifies.

In this dataframe, `area_title` is the MSA or non-MSA title.

In [None]:
fossil_employment_df[fossil_employment_df.area_title.str.contains("Abilene")].head(3)

# Generate the unemployment dataframe

Generate `unemployment_df` if you want to see if a county also meets the unemployment threshold

In [None]:
cps_raw_df = energy_comms.extract.bls.extract_national_unemployment_rates()
lau_raw_df = energy_comms.extract.bls.extract_lau_rates(update=update)
cps_df = energy_comms.transform.bls.transform_national_unemployment_rates(
    cps_raw_df
)
lau_df = energy_comms.transform.bls.transform_local_area_unemployment_rates(
    raw_lau_df=lau_raw_df,
    non_msa_county_crosswalk=non_msa_to_county_df,
    msa_county_crosswalk=msa_to_county_df,
)
unemployment_df = (
    energy_comms.generate_qualifying_areas.unemployment_rate_qualifying_areas(
        national_unemployment_df=cps_df, lau_df=lau_df
    )
)