In [2]:
import sys
import pandas as pd

sys.path.insert(1, '../../TESTING/')

import county_library as CL
import county_stratification as CS

from numpy import corrcoef

In [9]:
def counties_dict():
    counties = CS.setup_data()
    county_names = [i for i in range(2009, 2019)]

    # Cleanup
    for i in range(len(counties)):
        if 'Unnamed: 0' in counties[i].columns:
            counties[i] = counties[i].drop(['Unnamed: 0'], axis=1)
    
    # Store in a Dict
    county_dict = {}
    for i in range(len(county_names)):
        name   = county_names[i]
        county = counties[i]
        
        county_dict[name] = county
        
    return county_dict

def correlation_coefficient(counties, column):
    coeffs = {}
    
    for i in range(2009,2019):
        coeffs[i] = corrcoef(counties[i][column], counties[i]['Death Rate'])[0, 1]
        
    return coeffs

def correlation_conversion(coeffs):
    keys = list(coeffs.keys())
    vals = list(coeffs.values())
    
    res = []
    
    for i in range(len(keys)):
        res.append([keys[i], vals[i]])
        
    return res

# Load Data

In [4]:
counties = counties_dict()

# Unemployment vs. Death Rate

In [15]:
unemployment = correlation_coefficient(counties, 'Unemployment Rate')
cols = ['Year', 'Coeff']
pd.DataFrame(correlation_conversion(unemployment), columns=cols)

Unnamed: 0,Year,Coeff
0,2009,0.219919
1,2010,0.250187
2,2011,0.156412
3,2012,0.163611
4,2013,0.195701
5,2014,0.244008
6,2015,0.252279
7,2016,0.157751
8,2017,0.22404
9,2018,0.235454


# Income vs. Death Rate

In [16]:
income = correlation_coefficient(counties, 'Income')
cols = ['Year', 'Coeff']
pd.DataFrame(correlation_conversion(income), columns=cols)

Unnamed: 0,Year,Coeff
0,2009,-0.434103
1,2010,-0.437332
2,2011,-0.435347
3,2012,-0.386342
4,2013,-0.363215
5,2014,-0.374275
6,2015,-0.34366
7,2016,-0.30097
8,2017,-0.312853
9,2018,-0.303193


# Poverty Rate vs. Death Rate

In [17]:
poverty_rate = correlation_coefficient(counties, 'Poverty Rate')
cols = ['Year', 'Coeff']
pd.DataFrame(correlation_conversion(poverty_rate), columns=cols)

Unnamed: 0,Year,Coeff
0,2009,0.371809
1,2010,0.432526
2,2011,0.417436
3,2012,0.302684
4,2013,0.304385
5,2014,0.302525
6,2015,0.261573
7,2016,0.196016
8,2017,0.243295
9,2018,0.241726


# Prescription Rate vs. Death Rate

In [18]:
prescription_rate = correlation_coefficient(counties, 'Prescription Rate')
cols = ['Year', 'Coeff']
pd.DataFrame(correlation_conversion(prescription_rate), columns=cols)

Unnamed: 0,Year,Coeff
0,2009,0.504983
1,2010,0.655358
2,2011,0.649015
3,2012,0.582052
4,2013,0.556451
5,2014,0.525114
6,2015,0.438865
7,2016,0.284216
8,2017,0.214884
9,2018,0.218648
