In [1]:
#Import Dependencies
import pandas as pd
from scipy.stats import spearmanr

In [2]:
#Import Wonder Data, and add to DataFrame - 3106 rows with pertinent data in CSV
mortality_df = pd.read_csv('Desktop/2016Mortality.txt', sep="\t")
mortality_df = mortality_df.head(3105)

In [3]:
#Transform DataFrame to show columns of interest - remove crude rate and notes, divide deaths/population per 1000
mortality_df = mortality_df[['County', 'County Code', 'Deaths', 'Population']]
mortality_df['Death Rate per 1000'] = (mortality_df['Deaths']/mortality_df['Population'] * 1000).round(1)
mortality_df['County Code'] = mortality_df['County Code'].astype(int)
mortality_df.head()

Unnamed: 0,County,County Code,Deaths,Population,Death Rate per 1000
0,"Autauga County, AL",1001,520.0,55416.0,9.4
1,"Baldwin County, AL",1003,1974.0,208563.0,9.5
2,"Barbour County, AL",1005,256.0,25965.0,9.9
3,"Bibb County, AL",1007,239.0,22643.0,10.6
4,"Blount County, AL",1009,697.0,57704.0,12.1


In [4]:
#Import SVI CSV
svi_df = pd.read_csv('Desktop/SVI2016_US_COUNTY.csv')
svi_df.head()

Unnamed: 0,FID,ST,STATE,ST_ABBR,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,M_TOTPOP,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,0,1,ALABAMA,AL,Autauga,1001,"Autauga County, Alabama",594.44612,55049,0,...,0,0,0,0,0,4852,649,8.9,1.2,40854
1,1,1,ALABAMA,AL,Blount,1009,"Blount County, Alabama",644.806508,57704,0,...,0,0,0,0,0,6388,740,11.2,1.3,42597
2,2,1,ALABAMA,AL,Chambers,1017,"Chambers County, Alabama",596.531112,34018,0,...,0,0,0,0,0,3979,544,11.8,1.6,27940
3,3,1,ALABAMA,AL,Coffee,1031,"Coffee County, Alabama",678.985652,50991,0,...,0,0,0,0,0,5253,464,10.7,0.9,47236
4,4,1,ALABAMA,AL,Colbert,1033,"Colbert County, Alabama",592.619664,54377,0,...,0,0,0,0,0,4932,458,9.1,0.8,56227


In [5]:
#Merge Mortality and SVI on County Code
merged_df = pd.merge(mortality_df, svi_df, how='right', left_on='County Code', right_on='FIPS')
merged_df.head()

Unnamed: 0,County,County Code,Deaths,Population,Death Rate per 1000,FID,ST,STATE,ST_ABBR,COUNTY,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,"Autauga County, AL",1001.0,520.0,55416.0,9.4,0,1,ALABAMA,AL,Autauga,...,0,0,0,0,0,4852,649,8.9,1.2,40854
1,"Baldwin County, AL",1003.0,1974.0,208563.0,9.5,1341,1,ALABAMA,AL,Baldwin,...,0,0,0,1,1,23255,1817,11.8,0.9,197683
2,"Barbour County, AL",1005.0,256.0,25965.0,9.9,3074,1,ALABAMA,AL,Barbour,...,0,0,1,2,8,3079,385,13.0,1.6,27321
3,"Bibb County, AL",1007.0,239.0,22643.0,10.6,2113,1,ALABAMA,AL,Bibb,...,0,0,1,2,2,1859,400,9.0,1.9,18756
4,"Blount County, AL",1009.0,697.0,57704.0,12.1,1,1,ALABAMA,AL,Blount,...,0,0,0,0,0,6388,740,11.2,1.3,42597


In [6]:
merged_df.to_csv('righttest.csv')

In [7]:
#Merge Mortality and SVI on County Code
merged_df2 = pd.merge(mortality_df, svi_df, how='inner', left_on='County Code', right_on='FIPS')
merged_df2.head()

Unnamed: 0,County,County Code,Deaths,Population,Death Rate per 1000,FID,ST,STATE,ST_ABBR,COUNTY,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,"Autauga County, AL",1001,520.0,55416.0,9.4,0,1,ALABAMA,AL,Autauga,...,0,0,0,0,0,4852,649,8.9,1.2,40854
1,"Baldwin County, AL",1003,1974.0,208563.0,9.5,1341,1,ALABAMA,AL,Baldwin,...,0,0,0,1,1,23255,1817,11.8,0.9,197683
2,"Barbour County, AL",1005,256.0,25965.0,9.9,3074,1,ALABAMA,AL,Barbour,...,0,0,1,2,8,3079,385,13.0,1.6,27321
3,"Bibb County, AL",1007,239.0,22643.0,10.6,2113,1,ALABAMA,AL,Bibb,...,0,0,1,2,2,1859,400,9.0,1.9,18756
4,"Blount County, AL",1009,697.0,57704.0,12.1,1,1,ALABAMA,AL,Blount,...,0,0,0,0,0,6388,740,11.2,1.3,42597


In [10]:
#Perform Spearman correlation to determine if relationship between mortality, SVI index
correlations = {}
columns = merged_df2.columns.tolist()

for column in columns:
    correlations[column] = spearmanr(merged_df2['Death Rate per 1000'], merged_df2[column])



In [23]:
#Add correlation values to DataFrame
result = pd.DataFrame.from_dict(correlations, orient='index')
result.columns = ['SCC', 'p-value']

result.sort_index()

Unnamed: 0,SCC,p-value
AREA_SQMI,-0.115421,1.129823e-10
COUNTY,-0.019261,2.834463e-01
County,-0.019336,2.815809e-01
County Code,0.000983,9.563544e-01
Death Rate per 1000,1.000000,0.000000e+00
Deaths,-0.276756,1.131980e-55
EPL_AGE17,-0.321897,9.732375e-76
EPL_AGE65,0.700513,0.000000e+00
EPL_CROWD,-0.238243,2.683696e-41
EPL_DISABL,0.651990,0.000000e+00


In [24]:
#Sort Data Frame by SCC
sorted_df = result.sort_values(by=['SCC'])
sorted_df

Unnamed: 0,SCC,p-value
E_LIMENG,-0.535116,1.808915e-229
M_LIMENG,-0.501235,4.236370e-197
E_MINRTY,-0.461334,2.222176e-163
E_CROWD,-0.455775,5.005209e-159
E_AGE17,-0.453982,1.218931e-157
M_MUNIT,-0.449123,6.340737e-154
M_CROWD,-0.444777,1.188585e-150
EPL_LIMENG,-0.442789,3.604155e-149
EP_LIMENG,-0.442527,5.639028e-149
E_MUNIT,-0.439673,7.231035e-147
