In [1]:
import pandas as pd

pd.set_option('display.max_columns', None)

# 2014 Data

In [14]:
# read in original csv
svi_2014 = pd.read_csv('./data/atsdr-svi/SVI2014_US_CNTY.csv')

# Setting index as FIPS / County Code
svi_2014.set_index('FIPS', inplace=True)

# Dropping unneeded columns
svi_2014.drop(['FID', 'ST', 'STATE', 'ST_ABBR', 'COUNTY', 'LOCATION', 'AFFGEOID', 'AREA_SQMI'], axis=1, inplace=True)

# Dropping computed columns, indicated by starting string
svi_2014 = svi_2014.loc[:, ~svi_2014.columns.str.startswith('M')]
svi_2014 = svi_2014.loc[:, ~svi_2014.columns.str.startswith('EP')]
svi_2014 = svi_2014.loc[:, ~svi_2014.columns.str.startswith('F')]
svi_2014 = svi_2014.loc[:, ~svi_2014.columns.str.startswith('RPL')]
svi_2014 = svi_2014.loc[:, ~svi_2014.columns.str.startswith('SPL')]

svi_2014.head()

Unnamed: 0_level_0,ST_ABBR,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_UNINSUR,E_DAYPOP
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,AL,55136.0,22431.0,20304.0,7006.0,2252.0,24644.0,5012.0,7321.0,14214.0,8700.0,1649.0,13103.0,249.0,842.0,4401.0,530.0,1081.0,442.0,5814.0,43534.0
1003,AL,191205.0,105563.0,73058.0,25988.0,7856.0,26851.0,14615.0,33782.0,43186.0,26603.0,5027.0,32158.0,2571.0,18988.0,12200.0,998.0,2242.0,2611.0,27758.0,177010.0
1005,AL,27119.0,11833.0,9145.0,5832.0,1527.0,17350.0,4790.0,4180.0,5862.0,4856.0,1098.0,14614.0,549.0,129.0,3175.0,178.0,802.0,2869.0,3760.0,29769.0
1007,AL,22653.0,8985.0,7078.0,3596.0,975.0,18110.0,3466.0,3209.0,4903.0,3343.0,637.0,5717.0,112.0,163.0,2379.0,17.0,299.0,1576.0,2725.0,19274.0
1009,AL,57645.0,23868.0,20934.0,9866.0,2291.0,20501.0,8567.0,9172.0,13877.0,9729.0,1650.0,6825.0,954.0,215.0,5497.0,344.0,823.0,572.0,6922.0,41857.0


In [3]:
# read in original txt
deaths_2014 = pd.read_csv('./data/mortality/2014-Heart-Related-Deaths.txt', sep='\t')

# drop unnecessary columns
deaths_2014.drop(['Population', 'Notes', 'Crude Rate', 'County'], axis=1, inplace=True)

# drop any rows with incomplete data
deaths_2014.dropna(inplace=True)

# reassign to integer values and set index
deaths_2014 = deaths_2014.astype(int)
deaths_2014.set_index('County Code', inplace=True)

deaths_2014.head()

Unnamed: 0_level_0,Deaths
County Code,Unnamed: 1_level_1
1001,114
1003,465
1005,90
1007,57
1009,143


In [4]:
# Joining heart deaths and SVI scores for every county
svi_deaths_2014 = pd.concat([svi_2014, deaths_2014], axis=1, ignore_index=False)

# Dropping rows with incomplete data
svi_deaths_2014.dropna(inplace=True)

# writing joined dataframe to csv
svi_deaths_2014.to_csv('./data/svi-mortality/2014-svi-mortality.csv')

svi_deaths_2014.head()

Unnamed: 0,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_UNINSUR,E_DAYPOP,Deaths
1001,55136.0,22431.0,20304.0,7006.0,2252.0,24644.0,5012.0,7321.0,14214.0,8700.0,1649.0,13103.0,249.0,842.0,4401.0,530.0,1081.0,442.0,5814.0,43534.0,114.0
1003,191205.0,105563.0,73058.0,25988.0,7856.0,26851.0,14615.0,33782.0,43186.0,26603.0,5027.0,32158.0,2571.0,18988.0,12200.0,998.0,2242.0,2611.0,27758.0,177010.0,465.0
1005,27119.0,11833.0,9145.0,5832.0,1527.0,17350.0,4790.0,4180.0,5862.0,4856.0,1098.0,14614.0,549.0,129.0,3175.0,178.0,802.0,2869.0,3760.0,29769.0,90.0
1007,22653.0,8985.0,7078.0,3596.0,975.0,18110.0,3466.0,3209.0,4903.0,3343.0,637.0,5717.0,112.0,163.0,2379.0,17.0,299.0,1576.0,2725.0,19274.0,57.0
1009,57645.0,23868.0,20934.0,9866.0,2291.0,20501.0,8567.0,9172.0,13877.0,9729.0,1650.0,6825.0,954.0,215.0,5497.0,344.0,823.0,572.0,6922.0,41857.0,143.0


# 2016 Data

In [5]:
# read in original csv
svi_2016 = pd.read_csv('./data/atsdr-svi/SVI2016_US_COUNTY.csv')

# Setting index as FIPS / County Code
svi_2016.set_index('FIPS', inplace=True)

# Dropping unneeded columns
svi_2016.drop(['FID', 'ST', 'STATE', 'ST_ABBR', 'COUNTY', 'LOCATION', 'AREA_SQMI'], axis=1, inplace=True)

# Dropping computed columns, indicated by starting string
svi_2016 = svi_2016.loc[:, ~svi_2016.columns.str.startswith('M')]
svi_2016 = svi_2016.loc[:, ~svi_2016.columns.str.startswith('EP')]
svi_2016 = svi_2016.loc[:, ~svi_2016.columns.str.startswith('F')]
svi_2016 = svi_2016.loc[:, ~svi_2016.columns.str.startswith('RPL')]
svi_2016 = svi_2016.loc[:, ~svi_2016.columns.str.startswith('SPL')]

svi_2016.head()

Unnamed: 0_level_0,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_UNINSUR,E_DAYPOP
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1001,55049.0,22714.0,20800.0,6697.0,1437.0,26168.0,4528.0,7695.0,13853.0,10009.0,1516.0,13386.0,432.0,1034.0,4095.0,254.0,1024.0,490.0,4852.0,40854.0
1009,57704.0,23850.0,20619.0,9441.0,1367.0,21033.0,7882.0,9921.0,13601.0,8538.0,1614.0,7122.0,1018.0,190.0,5467.0,391.0,816.0,552.0,6388.0,42597.0
1017,34018.0,16905.0,13851.0,6805.0,1136.0,21532.0,4708.0,6255.0,7283.0,5960.0,1354.0,14715.0,114.0,597.0,2695.0,555.0,1110.0,482.0,3979.0,27940.0
1031,50991.0,22862.0,19375.0,8219.0,1410.0,25325.0,5145.0,8048.0,12122.0,8942.0,2018.0,14598.0,716.0,218.0,2863.0,270.0,1126.0,616.0,5253.0,47236.0
1033,54377.0,26156.0,22105.0,8910.0,1795.0,23318.0,6344.0,10034.0,11735.0,10561.0,1879.0,11499.0,104.0,641.0,2478.0,150.0,1361.0,432.0,4932.0,56227.0


In [6]:
# read in original txt
deaths_2016 = pd.read_csv('./data/mortality/2016-Heart-Related-Deaths.txt', sep='\t')

# drop unnecessary columns
deaths_2016.drop(['Population', 'Notes', 'Crude Rate', 'County'], axis=1, inplace=True)

# drop any rows with incomplete data
deaths_2016.dropna(inplace=True)

# reassign to integer values and set index
deaths_2016 = deaths_2016.astype(int)
deaths_2016.set_index('County Code', inplace=True)

deaths_2016.head()

Unnamed: 0_level_0,Deaths
County Code,Unnamed: 1_level_1
1001,118
1003,492
1005,67
1007,63
1009,154


In [7]:
# Joining heart deaths and SVI scores for every county
svi_deaths_2016 = pd.concat([svi_2016, deaths_2016], axis=1, ignore_index=False)

# Dropping rows with incomplete data
svi_deaths_2016.dropna(inplace=True)

# writing joined dataframe to csv
svi_deaths_2016.to_csv('./data/svi-mortality/2016-svi-mortality.csv')

svi_deaths_2016.head()

Unnamed: 0,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_UNINSUR,E_DAYPOP,Deaths
1001,55049.0,22714.0,20800.0,6697.0,1437.0,26168.0,4528.0,7695.0,13853.0,10009.0,1516.0,13386.0,432.0,1034.0,4095.0,254.0,1024.0,490.0,4852.0,40854.0,118.0
1009,57704.0,23850.0,20619.0,9441.0,1367.0,21033.0,7882.0,9921.0,13601.0,8538.0,1614.0,7122.0,1018.0,190.0,5467.0,391.0,816.0,552.0,6388.0,42597.0,154.0
1017,34018.0,16905.0,13851.0,6805.0,1136.0,21532.0,4708.0,6255.0,7283.0,5960.0,1354.0,14715.0,114.0,597.0,2695.0,555.0,1110.0,482.0,3979.0,27940.0,100.0
1031,50991.0,22862.0,19375.0,8219.0,1410.0,25325.0,5145.0,8048.0,12122.0,8942.0,2018.0,14598.0,716.0,218.0,2863.0,270.0,1126.0,616.0,5253.0,47236.0,131.0
1033,54377.0,26156.0,22105.0,8910.0,1795.0,23318.0,6344.0,10034.0,11735.0,10561.0,1879.0,11499.0,104.0,641.0,2478.0,150.0,1361.0,432.0,4932.0,56227.0,161.0


# 2018 Data

In [8]:
# read in original csv
svi_2018 = pd.read_csv('./data/atsdr-svi/SVI2018_US_COUNTY.csv')

# Setting index as FIPS / County Code
svi_2018.set_index('FIPS', inplace=True)

# Dropping unneeded columns
svi_2018.drop(['ST', 'STATE', 'ST_ABBR', 'COUNTY', 'LOCATION', 'AREA_SQMI'], axis=1, inplace=True)

# Dropping computed columns, indicated by starting string
svi_2018 = svi_2018.loc[:, ~svi_2018.columns.str.startswith('M')]
svi_2018 = svi_2018.loc[:, ~svi_2018.columns.str.startswith('EP')]
svi_2018 = svi_2018.loc[:, ~svi_2018.columns.str.startswith('F')]
svi_2018 = svi_2018.loc[:, ~svi_2018.columns.str.startswith('RPL')]
svi_2018 = svi_2018.loc[:, ~svi_2018.columns.str.startswith('SPL')]

svi_2018.head()

Unnamed: 0_level_0,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_UNINSUR,E_DAYPOP
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
35039,39307,20044,12398,-999,-999,-999,3669,7083,9318,6280,1330,34397,755,67,7770,264,763,654,4160,32290
1001,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,3875,37301
1009,57645,24222,20600,8220,909,22656,7861,10233,13468,8114,1437,7413,934,211,6108,339,856,543,6303,40036
1013,20025,10026,6708,4640,567,20430,2141,3806,4566,3492,704,9641,93,134,2625,119,520,322,2005,17280
1015,115098,53682,45033,20819,4628,24706,12620,19386,25196,23598,4701,31675,1076,1990,7904,772,2599,3112,10686,117894


In [9]:
# read in original txt
deaths_2018 = pd.read_csv('./data/mortality/2018-Heart-Related-Deaths.txt', sep='\t')

# drop unnecessary columns
deaths_2018.drop(['Population', 'Notes', 'Crude Rate', 'County'], axis=1, inplace=True)

# drop any rows with incomplete data
deaths_2018.dropna(inplace=True)

# reassign to integer values and set index
deaths_2018 = deaths_2018.astype(int)
deaths_2018.set_index('County Code', inplace=True)

deaths_2018.head()

Unnamed: 0_level_0,Deaths
County Code,Unnamed: 1_level_1
1001,126
1003,574
1005,93
1007,84
1009,179


In [10]:
# Joining heart deaths and SVI scores for every county
svi_deaths_2018 = pd.concat([svi_2018, deaths_2018], axis=1, ignore_index=False)

# Dropping rows with incomplete data
svi_deaths_2018.dropna(inplace=True)

# writing joined dataframe to csv
svi_deaths_2018.to_csv('./data/svi-mortality/2018-svi-mortality.csv')

svi_deaths_2018.head()

Unnamed: 0,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_UNINSUR,E_DAYPOP,Deaths
35039,39307.0,20044.0,12398.0,-999.0,-999.0,-999.0,3669.0,7083.0,9318.0,6280.0,1330.0,34397.0,755.0,67.0,7770.0,264.0,763.0,654.0,4160.0,32290.0,64.0
1001,55200.0,23315.0,21115.0,8422.0,1065.0,29372.0,4204.0,8050.0,13369.0,10465.0,1586.0,13788.0,426.0,886.0,4279.0,299.0,1191.0,546.0,3875.0,37301.0,126.0
1009,57645.0,24222.0,20600.0,8220.0,909.0,22656.0,7861.0,10233.0,13468.0,8114.0,1437.0,7413.0,934.0,211.0,6108.0,339.0,856.0,543.0,6303.0,40036.0,179.0
1013,20025.0,10026.0,6708.0,4640.0,567.0,20430.0,2141.0,3806.0,4566.0,3492.0,704.0,9641.0,93.0,134.0,2625.0,119.0,520.0,322.0,2005.0,17280.0,78.0
1015,115098.0,53682.0,45033.0,20819.0,4628.0,24706.0,12620.0,19386.0,25196.0,23598.0,4701.0,31675.0,1076.0,1990.0,7904.0,772.0,2599.0,3112.0,10686.0,117894.0,477.0


# 2020 Data

In [11]:
# read in original csv
svi_2020 = pd.read_csv('./data/atsdr-svi/SVI2020_US_COUNTY.csv')

# Setting index as FIPS / County Code
svi_2020.set_index('FIPS', inplace=True)

# Dropping unneeded columns
svi_2020.drop(['ST', 'STATE', 'ST_ABBR', 'COUNTY', 'LOCATION', 'AREA_SQMI', 'STCNTY'], axis=1, inplace=True)

# Dropping computed columns, indicated by starting string
svi_2020 = svi_2020.loc[:, ~svi_2020.columns.str.startswith('M')]
svi_2020 = svi_2020.loc[:, ~svi_2020.columns.str.startswith('EP')]
svi_2020 = svi_2020.loc[:, ~svi_2020.columns.str.startswith('F')]
svi_2020 = svi_2020.loc[:, ~svi_2020.columns.str.startswith('RPL')]
svi_2020 = svi_2020.loc[:, ~svi_2020.columns.str.startswith('SPL')]

svi_2020.head()

Unnamed: 0_level_0,E_TOTPOP,E_HU,E_HH,E_POV150,E_UNEMP,E_HBURD,E_NOHSDP,E_UNINSUR,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_LIMENG,E_MINRTY,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_DAYPOP,E_NOINT,E_AFAM,E_HISP,E_ASIAN,E_AIAN,E_NHPI,E_TWOMORE,E_OTHERRACE
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
1001,55639,23697,21559,12611,736,5029,4273,4345,8490,13143,9658,1608,363,14479,918,4313,339,1167,578,41810,7100,10849,1601,649,155,0,1124,101
1003,218289,116747,84047,36413,4027,19350,14823,20501,44716,46993,30615,3317,1593,37334,19513,11893,1280,2627,2954,218607,24453,19027,9947,2033,1327,10,4250,740
1005,25026,12057,9322,8965,649,2305,4497,2362,4777,5222,4159,1029,433,13694,170,3644,350,1039,2910,27133,6249,11889,1110,122,81,1,334,157
1007,22374,9237,7259,5730,667,1580,3056,1878,3676,4584,3748,790,75,5724,228,2943,91,481,1657,18799,3839,4971,600,56,12,0,85,0
1009,57755,24404,21205,13624,1253,4060,6838,5746,10382,13372,8564,1313,801,7690,167,6043,385,1077,564,42172,8987,771,5362,236,49,55,1038,179


In [12]:
# read in original txt
deaths_2020 = pd.read_csv('./data/mortality/2020-Heart-Related-Deaths.txt', sep='\t')

# drop unnecessary columns
deaths_2020.drop(['Population', 'Notes', 'Crude Rate', 'County'], axis=1, inplace=True)

# drop any rows with incomplete data
deaths_2020.dropna(inplace=True)

# reassign to integer values and set index
deaths_2020 = deaths_2020.astype(int)
deaths_2020.set_index('County Code', inplace=True)

deaths_2020.head()

Unnamed: 0_level_0,Deaths
County Code,Unnamed: 1_level_1
1001,155
1003,584
1005,106
1007,78
1009,198


In [13]:
# Joining heart deaths and SVI scores for every county
svi_deaths_2020 = pd.concat([svi_2020, deaths_2020], axis=1, ignore_index=False)

# Dropping rows with incomplete data
svi_deaths_2020.dropna(inplace=True)

# writing joined dataframe to csv
svi_deaths_2020.to_csv('./data/svi-mortality/2020-svi-mortality.csv')

svi_deaths_2020.head()

Unnamed: 0,E_TOTPOP,E_HU,E_HH,E_POV150,E_UNEMP,E_HBURD,E_NOHSDP,E_UNINSUR,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_LIMENG,E_MINRTY,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,E_DAYPOP,E_NOINT,E_AFAM,E_HISP,E_ASIAN,E_AIAN,E_NHPI,E_TWOMORE,E_OTHERRACE,Deaths
1001,55639.0,23697.0,21559.0,12611.0,736.0,5029.0,4273.0,4345.0,8490.0,13143.0,9658.0,1608.0,363.0,14479.0,918.0,4313.0,339.0,1167.0,578.0,41810.0,7100.0,10849.0,1601.0,649.0,155.0,0.0,1124.0,101.0,155.0
1003,218289.0,116747.0,84047.0,36413.0,4027.0,19350.0,14823.0,20501.0,44716.0,46993.0,30615.0,3317.0,1593.0,37334.0,19513.0,11893.0,1280.0,2627.0,2954.0,218607.0,24453.0,19027.0,9947.0,2033.0,1327.0,10.0,4250.0,740.0,584.0
1005,25026.0,12057.0,9322.0,8965.0,649.0,2305.0,4497.0,2362.0,4777.0,5222.0,4159.0,1029.0,433.0,13694.0,170.0,3644.0,350.0,1039.0,2910.0,27133.0,6249.0,11889.0,1110.0,122.0,81.0,1.0,334.0,157.0,106.0
1007,22374.0,9237.0,7259.0,5730.0,667.0,1580.0,3056.0,1878.0,3676.0,4584.0,3748.0,790.0,75.0,5724.0,228.0,2943.0,91.0,481.0,1657.0,18799.0,3839.0,4971.0,600.0,56.0,12.0,0.0,85.0,0.0,78.0
1009,57755.0,24404.0,21205.0,13624.0,1253.0,4060.0,6838.0,5746.0,10382.0,13372.0,8564.0,1313.0,801.0,7690.0,167.0,6043.0,385.0,1077.0,564.0,42172.0,8987.0,771.0,5362.0,236.0,49.0,55.0,1038.0,179.0,198.0
