In [1]:
import pandas as pd

In [2]:
def create_fips_code(df):
    fips_code = []
    for i in range(df['statecode'].shape[0]):
        # combine the statecode and countycode to create the fips_code
        string = '{:d}{:03d}'.format(df['statecode'].iloc[i], df['countycode'].iloc[i])
        fips_code.append(int(string)) 
    return fips_code

In [3]:
# load the data
data_2017 = pd.read_csv('data/2017 CHR analytic data.csv')
data_2016 = pd.read_csv('data/2016 CHR analytic data.csv')
data_2015 = pd.read_csv('data/2015 CHR analytic data.csv')
data_2014 = pd.read_csv('data/2014 CHR analytic data.csv')

# convert all columns to lower case
data_2017.columns = map(str.lower, data_2017.columns)
data_2016.columns = map(str.lower, data_2016.columns)
data_2015.columns = map(str.lower, data_2015.columns)
data_2014.columns = map(str.lower, data_2014.columns)

In [4]:
# create the fips_code and add it as the 1st column to each dataframe
data_2017.insert(loc=0, column='fips_code', value=create_fips_code(data_2017))
data_2016.insert(loc=0, column='fips_code', value=create_fips_code(data_2016))
data_2015.insert(loc=0, column='fips_code', value=create_fips_code(data_2015))
data_2014.insert(loc=0, column='fips_code', value=create_fips_code(data_2014))

In [5]:
# add the year as the 2nd column
data_2017.insert(loc=1, column='year', value=2017)
data_2016.insert(loc=1, column='year', value=2016)
data_2015.insert(loc=1, column='year', value=2015)
data_2014.insert(loc=1, column='year', value=2014)

In [6]:
# visual inspection
data_2017.head(3)

Unnamed: 0,fips_code,year,5-digit fips code,statecode,countycode,state,county,county that was not ranked,premature death value,premature death numerator,...,percent of population that is female,percent of population that is female numerator,percent of population that is female denominator,percent of population that is female lower confidence interval,percent of population that is female upper confidence interval,population living in a rural area value,population living in a rural area numerator,population living in a rural area denominator,population living in a rural area lower confidence interval,population living in a rural area upper confidence interval
0,1000,2017,1000,1,0,AL,Alabama,,9573.0,75262,...,0.516,,,,,0.41,1957932,4779736,,
1,1001,2017,1001,1,1,AL,Autauga County,,9158.0,830,...,0.515,,,,,0.42,22921,54571,,
2,1003,2017,1003,1,3,AL,Baldwin County,,7394.0,2573,...,0.513,,,,,0.423,77060,182265,,


In [7]:
# error check that the fips_code was calculated correctly for 2017 data 
# (only this data has the fips code column beforehand)
print(sum(data_2017['fips_code'] == data_2017['5-digit fips code']))
print(data_2017.shape[0])

3186
3186


In [8]:
# # set the index to the fips_code (unique to each county)
data_2017 = data_2017.set_index('fips_code')
data_2016 = data_2016.set_index('fips_code')
data_2015 = data_2015.set_index('fips_code')
data_2014 = data_2014.set_index('fips_code');    # suppress output with ;# visual inspection

In [9]:
# visual inspection
data_2017.head(3)

Unnamed: 0_level_0,year,5-digit fips code,statecode,countycode,state,county,county that was not ranked,premature death value,premature death numerator,premature death denominator,...,percent of population that is female,percent of population that is female numerator,percent of population that is female denominator,percent of population that is female lower confidence interval,percent of population that is female upper confidence interval,population living in a rural area value,population living in a rural area numerator,population living in a rural area denominator,population living in a rural area lower confidence interval,population living in a rural area upper confidence interval
fips_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,2017,1000,1,0,AL,Alabama,,9573.0,75262,13597347,...,0.516,,,,,0.41,1957932,4779736,,
1001,2017,1001,1,1,AL,Autauga County,,9158.0,830,157247,...,0.515,,,,,0.42,22921,54571,,
1003,2017,1003,1,3,AL,Baldwin County,,7394.0,2573,543456,...,0.513,,,,,0.423,77060,182265,,


In [10]:
# visual inspection
data_2016.head(3)

Unnamed: 0_level_0,year,statecode,countycode,state,county,county that was not ranked,premature death value,premature death numerator,premature death denominator,premature death lower confidence interval,...,percent of population that is female,percent of population that is female numerator,percent of population that is female denominator,percent of population that is female lower confidence interval,percent of population that is female upper confidence interval,population living in a rural area value,population living in a rural area numerator,population living in a rural area denominator,population living in a rural area lower confidence interval,population living in a rural area upper confidence interval
fips_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,2016,1,0,AL,Alabama,,9545,73929,13566808,9446,...,0.515,,,,,0.41,1957932,4779736,,
1001,2016,1,1,AL,Autauga County,,9215,809,157582,8308,...,0.514,,,,,0.42,22921,54571,,
1003,2016,1,3,AL,Baldwin County,,7455,2506,531694,7002,...,0.512,,,,,0.423,77060,182265,,
