In [1]:
from pathlib import Path
import pandas as pd
import sys
sys.path.append(str(Path.cwd().parent))

In [2]:
from scripts.config import paths

In [3]:
DATA_YEAR = "2020"
TEST_STATE = "Massachusetts"

In [4]:
enrl_dir = paths["data"] / "enrl"

In [5]:
enrl_tag = "August2022"
enrl_filenm = f"EnrollmentData{enrl_tag}.csv"

In [6]:
df = pd.read_csv(enrl_dir / enrl_filenm)
print(f"enrollment data ({enrl_tag}) loaded -> rows: {df.shape[0]}, cols: {df.shape[1]}")
print(f"columns:")
for col in df.columns:
    print(f" {col}")

enrollment data (August2022) loaded -> rows: 6281, cols: 26
columns:
 state_abbreviation
 state_name
 report_date
 state_expanded_medicaid
 preliminary_updated
 final_report
 new_applications_submitted_to_medicaid_and_chip_agencies
 new_applications_submitted_to_medicaid_and_chip_agencies__f_85d7
 applications_for_financial_assistance_submitted_to_the_stat_104d
 applications_for_financial_assistance_submitted_to_the_stat_c640
 total_applications_for_financial_assistance_submitted_at_st_d6fa
 total_applications_for_financial_assistance_submitted_at_st_9919
 individuals_determined_eligible_for_medicaid_at_application
 individuals_determined_eligible_for_medicaid_at_application_4f96
 individuals_determined_eligible_for_chip_at_application
 individuals_determined_eligible_for_chip_at_application__fo_e28a
 total_medicaid_and_chip_determinations
 total_medicaid_and_chip_determinations__footnotes
 medicaid_and_chip_child_enrollment
 medicaid_and_chip_child_enrollment__footnotes
 total_medicai

In [7]:
STATE_COLS = [
    "state_abbreviation", 
    "state_name", 
    "state_expanded_medicaid",
]

In [8]:
META_COLS = [
    "report_date", 
    "preliminary_updated", 
    "final_report",
]

In [9]:
ENRL_COLS = [
    "medicaid_and_chip_child_enrollment",
    "total_medicaid_and_chip_enrollment",
    "total_medicaid_enrollment",
    "total_chip_enrollment",
]

In [10]:
DATE_COLS = [
    "report_month", 
    "report_day", 
    "report_year",
]

In [11]:
SKIP_COLS = [
    "report_day",
    "state_name",
    "state_expanded_medicaid",
]

In [12]:
keep_cols = STATE_COLS + META_COLS + ENRL_COLS

In [13]:
drop_cols = [col for col in df.columns if col not in keep_cols]

In [14]:
df = df.drop(columns=drop_cols)

In [15]:
pd.crosstab(df["preliminary_updated"], df["final_report"])

final_report,N,Y
preliminary_updated,Unnamed: 1_level_1,Unnamed: 2_level_1
P,3162,51
U,0,3068


In [16]:
df[DATE_COLS] = df["report_date"].str.split('/', expand=True)

In [17]:
df['report_year'].value_counts(sort=False)

2013      51
2017     639
2018    1156
2019    1222
2020    1224
2021    1224
2022     765
Name: report_year, dtype: int64

In [18]:
fnl_df = df[df["final_report"] == "Y"]
yr_fnl_df = fnl_df[fnl_df["report_year"] == DATA_YEAR]

In [19]:
st_yr_fnl_df = yr_fnl_df[yr_fnl_df["state_name"] == TEST_STATE]

In [20]:
st_yr_fnl_df = st_yr_fnl_df \
    .drop(columns=SKIP_COLS+META_COLS) \
    .reset_index(drop=True) \
    .copy()

In [21]:
st_yr_fnl_df

Unnamed: 0,state_abbreviation,medicaid_and_chip_child_enrollment,total_medicaid_and_chip_enrollment,total_medicaid_enrollment,total_chip_enrollment,report_month,report_year
0,MA,665605.0,1554381.0,1370618.0,183763.0,1,2020
1,MA,656626.0,1530761.0,1349649.0,181112.0,2,2020
2,MA,669204.0,1569853.0,1383851.0,186002.0,3,2020
3,MA,675242.0,1593701.0,1407388.0,186313.0,4,2020
4,MA,679782.0,1609953.0,1422125.0,187828.0,5,2020
5,MA,683108.0,1624309.0,1435296.0,189013.0,6,2020
6,MA,687083.0,1640354.0,1450220.0,190134.0,7,2020
7,MA,690395.0,1656650.0,1464645.0,192005.0,8,2020
8,MA,694058.0,1674487.0,1480621.0,193866.0,9,2020
9,MA,697079.0,1693159.0,1494913.0,198246.0,10,2020


In [22]:
st_yr_fnl_df[ENRL_COLS].mean()

medicaid_and_chip_child_enrollment    6.832250e+05
total_medicaid_and_chip_enrollment    1.631397e+06
total_medicaid_enrollment             1.440872e+06
total_chip_enrollment                 1.905258e+05
dtype: float64