In [1]:
###### 
## author: Yifeng Wan, yifengwan@jhu.edu
## 2023-03-02
##
######

import numpy as np
import pandas as pd
import re
import os

In [2]:
# clean file
uninsured_files = {}
for year in [2018, 2019, 2020, 2021]:
    filename = './acs/acs_uninsured/uninsured_' + str(year) + '_raw.csv'
    file = pd.read_csv(filename)
    # clear column and dtype to get percent uninsured column
    file['Label (Grouping)'] = file['Label (Grouping)'].str.replace('\xa0', '')
    file.iloc[:, 1:] = file.iloc[:, 1:].fillna(method='bfill')
    file = file.loc[file['Label (Grouping)']!='Estimate'].copy()
    file.iloc[:, 1:] = file.iloc[:, 1:].apply(lambda x: x.str.replace(',', ''), axis=1)
    for c in file.columns.tolist()[1:]:
        file[c] = file[c].astype(int)
    file['uninsured'] = file.iloc[:, 2:].apply(np.sum, axis=1)
    uninsured_percent_year = 'uninsured_percent' + str(year)
    file[uninsured_percent_year] = file['uninsured'] / file['Total:'] * 100
    file[uninsured_percent_year] = file[uninsured_percent_year].round(2)
    # split county-state name column, get a county column and a state column
    file[['ctyname', 'stname']] = file['Label (Grouping)'].str.split(',', expand = True)
    file['stname'] = file['stname'].str.strip()
    file['ctyname'] = file['ctyname'].str.strip()
    file = file[['stname', 'ctyname', uninsured_percent_year]].copy()
    export_name = './acs/acs_uninsured/uninsured_' + str(year) + '_cleaned.csv'
    file.to_csv(export_name, index = False)
    uninsured_files[str(year)] = file

In [3]:
# merge files into one file, add fips code
fips = pd.read_csv('./acs/FIPS/state_county_fips_updated.csv')
fips.rename(columns = {'fips': 'scfips'}, inplace = True)

# fips + 2018
uninsure_all = pd.merge(left = fips, right = uninsured_files['2018'], how = 'outer', \
               on = ['stname', 'ctyname'])
print(uninsure_all.shape, uninsure_all.columns)
# fips + 2018 + 2019
uninsure_all = pd.merge(left = uninsure_all, right = uninsured_files['2019'], how = 'outer', \
               on = ['stname', 'ctyname'])
print(uninsure_all.shape, uninsure_all.columns)
# fips + 2018 + 2019 + 2020
uninsure_all = pd.merge(left = uninsure_all, right = uninsured_files['2020'], how = 'outer', \
               on = ['stname', 'ctyname'])
print(uninsure_all.shape, uninsure_all.columns)
# fips + 2018 + 2019 + 2020 + 2021
uninsure_all = pd.merge(left = uninsure_all, right = uninsured_files['2021'], how = 'outer', \
               on = ['stname', 'ctyname'])
print(uninsure_all.shape, uninsure_all.columns)

# export to a single csv file
uninsure_all.to_csv('./acs/acs_uninsured/uninsured18to21.csv', index = False)

(3222, 6) Index(['stfips', 'ctyfips', 'stname', 'ctyname', 'scfips',
       'uninsured_percent2018'],
      dtype='object')
(3222, 7) Index(['stfips', 'ctyfips', 'stname', 'ctyname', 'scfips',
       'uninsured_percent2018', 'uninsured_percent2019'],
      dtype='object')
(3222, 8) Index(['stfips', 'ctyfips', 'stname', 'ctyname', 'scfips',
       'uninsured_percent2018', 'uninsured_percent2019',
       'uninsured_percent2020'],
      dtype='object')
(3222, 9) Index(['stfips', 'ctyfips', 'stname', 'ctyname', 'scfips',
       'uninsured_percent2018', 'uninsured_percent2019',
       'uninsured_percent2020', 'uninsured_percent2021'],
      dtype='object')
