In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [26]:
def health_insurance(file,year):
    # Read in file
    df = pd.read_csv(file)

    # Get rid of spaces in Labels
    df['Label'] = df['Label'].apply(lambda x: x.lstrip())

    # Convert percentage estimates to float
    df['Percentage'] = df['Estimate'].apply(lambda x: float(x.rstrip('%'))/100 if '%' in x else None)

    # Move Total Civilian Noninstitutionalized Population from row to column
    df['Total Civilian Noninstitutionalized Population'] = int(df.iloc[0]['Estimate'].replace(',',''))
    df.dropna(inplace=True)

    # Drop unnecessary columns
    df.drop(columns=['Estimate','Margin of Error'], inplace=True)

    # Calculate population estimates
    df['Population Estimate'] = df['Percentage']*df['Total Civilian Noninstitutionalized Population']

    # Drop unnecessary columns
    df.drop(columns=['Percentage','Total Civilian Noninstitutionalized Population'], inplace=True)

    # Change index and transpose table
    df.set_index('Label', inplace=True)
    df = df.T

    # Change index to year
    df.index = [year]

    # Calculate extra columns
    df['Total Civilian Noninstitutionalized Population'] = df.sum(axis=1)

    return df

In [27]:
health_insurance('data/2022/HEALTH_INSURANCE_COVERAGE.CSV',2022)

Label,With private health insurance,With public coverage,No health insurance coverage,Total Civilian Noninstitutionalized Population
2022,65433.654,24244.701,7841.223,97519.578


In [28]:
for year in range(2010,2023):
    file = 'data/'+str(year)+'/HEALTH_INSURANCE_COVERAGE.CSV'

    try:
        health_insurance(file,year)
    except:
        print(f"No file for year '{year}'")

No file for year '2020'


In [29]:
HEALTH_INSURANCE_COVERAGE_2022 = health_insurance('data/2022/HEALTH_INSURANCE_COVERAGE.CSV',2022)
HEALTH_INSURANCE_COVERAGE_2021 = health_insurance('data/2021/HEALTH_INSURANCE_COVERAGE.CSV',2021)
HEALTH_INSURANCE_COVERAGE_2019 = health_insurance('data/2019/HEALTH_INSURANCE_COVERAGE.CSV',2019)
HEALTH_INSURANCE_COVERAGE_2018 = health_insurance('data/2018/HEALTH_INSURANCE_COVERAGE.CSV',2018)
HEALTH_INSURANCE_COVERAGE_2017 = health_insurance('data/2017/HEALTH_INSURANCE_COVERAGE.CSV',2017)
HEALTH_INSURANCE_COVERAGE_2016 = health_insurance('data/2016/HEALTH_INSURANCE_COVERAGE.CSV',2016)
HEALTH_INSURANCE_COVERAGE_2015 = health_insurance('data/2015/HEALTH_INSURANCE_COVERAGE.CSV',2015)
HEALTH_INSURANCE_COVERAGE_2014 = health_insurance('data/2014/HEALTH_INSURANCE_COVERAGE.CSV',2014)
HEALTH_INSURANCE_COVERAGE_2013 = health_insurance('data/2013/HEALTH_INSURANCE_COVERAGE.CSV',2013)
HEALTH_INSURANCE_COVERAGE_2012 = health_insurance('data/2012/HEALTH_INSURANCE_COVERAGE.CSV',2012)
HEALTH_INSURANCE_COVERAGE_2011 = health_insurance('data/2011/HEALTH_INSURANCE_COVERAGE.CSV',2011)
HEALTH_INSURANCE_COVERAGE_2010 = health_insurance('data/2010/HEALTH_INSURANCE_COVERAGE.CSV',2010)

In [30]:
HEALTH_INSURANCE_COVERAGE = pd.concat([HEALTH_INSURANCE_COVERAGE_2022,HEALTH_INSURANCE_COVERAGE_2021,HEALTH_INSURANCE_COVERAGE_2019,HEALTH_INSURANCE_COVERAGE_2018,HEALTH_INSURANCE_COVERAGE_2017,HEALTH_INSURANCE_COVERAGE_2016,HEALTH_INSURANCE_COVERAGE_2015,HEALTH_INSURANCE_COVERAGE_2014,HEALTH_INSURANCE_COVERAGE_2013,HEALTH_INSURANCE_COVERAGE_2012,HEALTH_INSURANCE_COVERAGE_2011,HEALTH_INSURANCE_COVERAGE_2010])
HEALTH_INSURANCE_COVERAGE

Label,With private health insurance,With public coverage,No health insurance coverage,Total Civilian Noninstitutionalized Population
2022,65433.654,24244.701,7841.223,97519.578
2021,65598.727,24102.897,6621.675,96323.299
2019,71575.924,22266.062,4597.446,98439.432
2018,67593.542,21761.39,6572.828,95927.76
2017,66172.754,20418.545,4083.709,90675.008
2016,58342.998,17172.939,4349.478,79865.415
2015,65003.32,17780.08,5382.96,88166.36
2014,61939.02,16040.618,7782.082,85761.72
2013,59515.536,17863.008,12019.968,89398.512
2012,58725.936,13822.461,10308.276,82856.673


In [31]:
HEALTH_INSURANCE_COVERAGE.reset_index(inplace=True)
HEALTH_INSURANCE_COVERAGE.rename(columns={'index':'Year'}, inplace=True)
#EMPLOYMENT_STATUS.index.names = ['index']
HEALTH_INSURANCE_COVERAGE

Label,Year,With private health insurance,With public coverage,No health insurance coverage,Total Civilian Noninstitutionalized Population
0,2022,65433.654,24244.701,7841.223,97519.578
1,2021,65598.727,24102.897,6621.675,96323.299
2,2019,71575.924,22266.062,4597.446,98439.432
3,2018,67593.542,21761.39,6572.828,95927.76
4,2017,66172.754,20418.545,4083.709,90675.008
5,2016,58342.998,17172.939,4349.478,79865.415
6,2015,65003.32,17780.08,5382.96,88166.36
7,2014,61939.02,16040.618,7782.082,85761.72
8,2013,59515.536,17863.008,12019.968,89398.512
9,2012,58725.936,13822.461,10308.276,82856.673


In [32]:
HEALTH_INSURANCE_COVERAGE.to_csv('data/final/HEALTH_INSURANCE_COVERAGE.CSV', index=False)

In [33]:
test = pd.read_csv('data/final/HEALTH_INSURANCE_COVERAGE.CSV')
test

Unnamed: 0,Year,With private health insurance,With public coverage,No health insurance coverage,Total Civilian Noninstitutionalized Population
0,2022,65433.654,24244.701,7841.223,97519.578
1,2021,65598.727,24102.897,6621.675,96323.299
2,2019,71575.924,22266.062,4597.446,98439.432
3,2018,67593.542,21761.39,6572.828,95927.76
4,2017,66172.754,20418.545,4083.709,90675.008
5,2016,58342.998,17172.939,4349.478,79865.415
6,2015,65003.32,17780.08,5382.96,88166.36
7,2014,61939.02,16040.618,7782.082,85761.72
8,2013,59515.536,17863.008,12019.968,89398.512
9,2012,58725.936,13822.461,10308.276,82856.673
