In [1]:
import pandas as pd
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
def create_real_fips(ffips):
    if (len(ffips) == 1) or (len(ffips) == 4):
        return '0'+ffips
    else:
        return ffips

In [3]:
def find_fips(geoid):
    if geoid[-4:-2] == 'US':
        return geoid[-2:]
    else:
        return geoid[-5:]
        

## CENSUS AGE COUNTS

Source: [ACS 2018, 5-year](https://data.census.gov/cedsci/table?q=age&g=0100000US.050000&tid=ACSDP5Y2018.DP05&t=Age%20and%20Sex&vintage=2018&hidePreview=true&tp=true)

In [4]:
trows = ['GEO_ID','NAME','DP05_0001E','DP05_0005E','DP05_0006E','DP05_0007E',
         'DP05_0008E','DP05_0009E','DP05_0010E','DP05_0011E','DP05_0012E',
         'DP05_0013E','DP05_0014E','DP05_0015E','DP05_0016E','DP05_0017E',
         'DP05_0018E','DP05_0023E','DP05_0024E']

age = pd.read_csv('../data/ACSDP5Y2018.DP05_data_with_overlays_2020-04-15T133113.csv',
                        usecols=trows)

col_rename = {'DP05_0001E':'pop','DP05_0005E':'pop_under5','DP05_0006E':'pop_5to9','DP05_0007E':'pop_10to14',
             'DP05_0008E':'pop_15to19','DP05_0009E':'pop_20to24','DP05_0010E':'pop_25to34',
              'DP05_0011E':'pop_35to44','DP05_0012E':'pop_45to54','DP05_0013E':'pop_55to59',
              'DP05_0014E':'pop_60to64','DP05_0015E':'pop_65to74','DP05_0016E':'pop_75to84',
              'DP05_0017E':'pop_85plus','DP05_0018E':'median_age','DP05_0023E':'pop_62plus',
              'DP05_0024E':'pop_65plus'}
age = age.rename(columns=col_rename)

#remove the line that contains the hr column titles
age = age.loc[~(age.index == 0)]
age['cnty_fips'] = age.apply(lambda row: find_fips(row['GEO_ID']), axis=1)
age['pop_62plus'] = pd.to_numeric(age['pop_62plus'],errors='coerce')
age['pop_65plus'] = pd.to_numeric(age['pop_65plus'],errors='coerce')
age['pop_85plus'] = pd.to_numeric(age['pop_85plus'],errors='coerce')

age.set_index('cnty_fips', inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


## CENSUS AGE PERCENTAGES
Source: [ACS 2018, 5-year](https://data.census.gov/cedsci/table?q=age&g=0100000US.050000&tid=ACSDP5Y2018.DP05&t=Age%20and%20Sex&vintage=2018&hidePreview=true&tp=true)

In [7]:
prows = ['GEO_ID','NAME','DP05_0001E','DP05_0005PE','DP05_0006PE','DP05_0007PE',
         'DP05_0008PE','DP05_0009PE','DP05_0010PE','DP05_0011PE','DP05_0012PE',
         'DP05_0013PE','DP05_0014PE','DP05_0015PE','DP05_0016PE','DP05_0017PE',
         'DP05_0018E','DP05_0023PE','DP05_0024PE']

perc_age = pd.read_csv('../data/ACSDP5Y2018.DP05_data_with_overlays_2020-04-15T133113.csv',
                        usecols=prows)

col_rename = {'DP05_0001E':'pop','DP05_0005PE':'perc_under5','DP05_0006PE':'perc_5to9','DP05_0007PE':'perc_10to14',
              'DP05_0008PE':'perc_15to19','DP05_0009PE':'perc_20to24','DP05_0010PE':'perc_25to34',
              'DP05_0011PE':'perc_35to44','DP05_0012PE':'perc_45to54','DP05_0013PE':'perc_55to59',
              'DP05_0014PE':'perc_60to64','DP05_0015PE':'perc_65to74','DP05_0016PE':'perc_75to84',
              'DP05_0017PE':'perc_85plus','DP05_0018E':'median_age','DP05_0023PE':'perc_62plus',
              'DP05_0024PE':'perc_65plus'}
perc_age = perc_age.rename(columns=col_rename)

#remove the line that contains the hr column titles
perc_age = perc_age.loc[~(perc_age.index == 0)]
perc_age['cnty_fips'] = perc_age.apply(lambda row: find_fips(row['GEO_ID']), axis=1)
perc_age['perc_62plus'] = pd.to_numeric(perc_age['perc_62plus'],errors='coerce')
perc_age['perc_65plus'] = pd.to_numeric(perc_age['perc_65plus'],errors='coerce')
perc_age['perc_85plus'] = pd.to_numeric(perc_age['perc_85plus'],errors='coerce')

perc_age.set_index('cnty_fips', inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


In [8]:
perc_age.head()

Unnamed: 0_level_0,GEO_ID,NAME,pop,perc_under5,perc_5to9,perc_10to14,perc_15to19,perc_20to24,perc_25to34,perc_35to44,perc_45to54,perc_55to59,perc_60to64,perc_65to74,perc_75to84,perc_85plus,median_age,perc_62plus,perc_65plus
cnty_fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0400000US01,Alabama,4864680,6.0,6.2,6.4,6.7,6.9,13.0,12.3,13.2,6.9,6.4,9.5,4.9,1.7,38.9,19.8,16.1
2,0400000US02,Alaska,738516,7.3,7.1,6.7,6.4,7.7,16.0,12.5,12.6,6.9,5.9,7.2,2.5,0.8,34.0,13.9,10.6
4,0400000US04,Arizona,6946685,6.3,6.5,6.7,6.7,7.0,13.6,12.3,12.2,6.1,5.9,9.7,5.1,1.9,37.4,20.2,16.7
5,0400000US05,Arkansas,2990671,6.4,6.6,6.6,6.7,6.8,13.0,12.3,12.6,6.5,6.1,9.5,4.9,1.9,37.9,19.9,16.3
6,0400000US06,California,39148760,6.3,6.4,6.5,6.6,7.2,15.1,13.2,13.2,6.3,5.6,7.8,4.0,1.8,36.3,16.8,13.6


## DISEASE DATA: Diabetes

Rates are percentages of the population. Rate out of 100 people.

Source: Dwyer-Lindgren L, Mackenbach JP, van Lenthe FJ, Flaxman AD, Mokdad AH. Diagnosed and undiagnosed diabetes prevalence by county in the US, 1999–2012. Diabetes Care. 2016 August 23; 39:1556–1562. doi: 10.2337/dc16-0678. via [The Institute for Health Metrics and Evaluation (IHME) Global Health Data Exchange (GHDx)](http://ghdx.healthdata.org/us-data)

In [9]:
diabetes_cols = ['Location','FIPS','Prevalence, 2012, Both Sexes']
diabetes = pd.read_excel('../data/IHME_USA_COUNTY_DIABETES_PREVALENCE_1999_2012_NATIONAL_Y2016M08D23.XLSX',
                             sheet_name='Total', skiprows=[0], dtype={'FIPS':str,'Prevalence, 2012, Both Sexes':float})
diabetes = diabetes[diabetes_cols]

In [10]:
diabetes = diabetes.dropna(subset=['FIPS'])
diabetes['cnty_fips'] = diabetes['FIPS'].apply(lambda x: create_real_fips(x))
diabetes['state_fips'] = diabetes['cnty_fips'].str[:2]

diabetes_2012 = diabetes[['Location','cnty_fips','state_fips','Prevalence, 2012, Both Sexes']]
diabetes_2012.rename(columns={'Prevalence, 2012, Both Sexes':'diabetes_rate'}, inplace=True)

diabetes_2012.set_index('cnty_fips', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [11]:
print(len(diabetes_2012))
diabetes_2012.head()

3193


Unnamed: 0_level_0,Location,state_fips,diabetes_rate
cnty_fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Alabama,1,16.19
1001,Autauga County,1,15.45
1003,Baldwin County,1,13.44
1005,Barbour County,1,19.36
1007,Bibb County,1,16.68


## DISEASE DATA: Cardiovascular Disease

Rates are deaths per 100,000 population.

Source: Roth GA, Dwyer-Lindgren, Bertozzi-Villa A, Stubbs RW, Morozoff C, Naghavi M, Mokdad AH, Murray CJL. Trends and patterns of geographic variation in cardiovascular mortality among US counties, 1980–2014. JAMA. 2017 May 16. via [The Institute for Health Metrics and Evaluation (IHME) Global Health Data Exchange (GHDx)](http://ghdx.healthdata.org/us-data)

In [12]:
cvd_data = pd.read_excel('../data/IHME_USA_COUNTY_CVD_MORTALITY_RATES_1980_2014_NATIONAL_Y2017M05D16.XLSX',
                       sheet_name='Cardiovascular diseases',skiprows=[0],dtype={'FIPS':str})

In [13]:
cvd_2014 = cvd_data[['Location', 'FIPS', 'Mortality Rate, 2014*','% Change in Mortality Rate, 1980-2014']]
cvd_2014 = cvd_2014.rename(columns={'Mortality Rate, 2014*':'cvd_rate',
                                    '% Change in Mortality Rate, 1980-2014':'cvd_chg_1980_2014'})
cvd = cvd_2014['cvd_rate'].str.split(pat = "(",expand=True)
cvd_2014['cvd_rate'] = cvd[0].astype('float64')

chg = cvd_2014['cvd_chg_1980_2014'].str.split(pat = "(",expand=True)
cvd_2014['cvd_chg_1980_2014'] = chg[0].astype('float64')

cvd_2014 = cvd_2014.dropna(subset=['FIPS'])
cvd_2014['cnty_fips'] = cvd_2014['FIPS'].apply(lambda x: create_real_fips(x))
cvd_2014['state_fips'] = cvd_2014['cnty_fips'].str[:2]

cvd_2014 = cvd_2014[['Location', 'cnty_fips', 'state_fips', 'cvd_rate', 'cvd_chg_1980_2014']]
cvd_2014.set_index('cnty_fips', inplace=True)

In [14]:
print(len(cvd_2014))
cvd_2014.head()

3193


Unnamed: 0_level_0,Location,state_fips,cvd_rate,cvd_chg_1980_2014
cnty_fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Alabama,1,310.59,-39.91
1001,"Autauga County, Alabama",1,316.36,-42.76
1003,"Baldwin County, Alabama",1,272.04,-38.96
1005,"Barbour County, Alabama",1,255.09,-50.52
1007,"Bibb County, Alabama",1,378.09,-31.09


## DISEASE DATA: Chronic Respiratory Disease

Rates are deaths per 100,000 population.

Source: Dwyer-Lindgren L, Bertozzi-Villa A, Stubbs RW, Morozoff C, Shirude S, Naghavi M, Mokdad AH, Murray CJL. Trends and patterns of differences in chronic respiratory disease mortality among US counties, 1980–2014. JAMA. 25 Sept 2017; 318(12):1136-1149. doi:10.1001/jama.2017.11747. via [The Institute for Health Metrics and Evaluation (IHME) Global Health Data Exchange (GHDx)](http://ghdx.healthdata.org/us-data)

In [15]:
crd_data = pd.read_excel('../data/IHME_USA_COUNTY_RESP_DISEASE_MORTALITY_1980_2014_NATIONAL_Y2017M09D26.xlsx',
                       sheet_name='Chronic respiratory diseases',skiprows=[0],dtype={'FIPS':str})

In [16]:
crd_2014 = crd_data[['Location', 'FIPS', 'Mortality Rate, 2014*','% Change in Mortality Rate, 1980-2014']]
crd_2014 = crd_2014.rename(columns={'Mortality Rate, 2014*':'crd_rate',
                                    '% Change in Mortality Rate, 1980-2014':'crd_chg_1980_2014'})

crd = crd_2014['crd_rate'].str.split(pat = "(",expand=True)
crd_2014['crd_rate'] = crd[0].astype('float64')

chg = crd_2014['crd_chg_1980_2014'].str.split(pat = "(",expand=True)
crd_2014['crd_chg_1980_2014'] = chg[0].astype('float64')

crd_2014 = crd_2014.dropna(subset=['FIPS'])
crd_2014['cnty_fips'] = crd_2014['FIPS'].apply(lambda x: create_real_fips(x))
crd_2014['state_fips'] = crd_2014['cnty_fips'].str[:2]

crd_2014 = crd_2014[['Location', 'cnty_fips', 'state_fips', 'crd_rate', 'crd_chg_1980_2014']]
crd_2014.set_index('cnty_fips', inplace=True)

In [17]:
print(len(crd_2014))
crd_2014.head()

3193


Unnamed: 0_level_0,Location,state_fips,crd_rate,crd_chg_1980_2014
cnty_fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Alabama,1,73.44,73.08
1001,"Autauga County, Alabama",1,81.79,75.67
1003,"Baldwin County, Alabama",1,54.28,46.1
1005,"Barbour County, Alabama",1,69.82,62.98
1007,"Bibb County, Alabama",1,84.49,83.11


## RISK ANALYSIS

We're going to combine covid-19 risk measure by county to create a county-by-county risk analysis.

Datasets defined in this notebook:
- `crd_2014` - Chronic respiratory disease mortality rates, deaths per 100,000, 2014
- `cvd_2014` - Cardiovascular disease mortality rates, deaths per 100,000, 2014
- `diabetes_2012` - Diabetes prevelance, percent of population who has diabetes, 2012
- `age` - Population by age of population, count of people in age groups, 2018 ACS 5-year
- `perc_age` - Population by age of population, percent age groups, 2018 ACS 5-year
- ~`beds_by_cnty` - CMS hospital count and bed data, 2017 CMS cost reports~

In [18]:
pop_merge = perc_age.join(age,how='left',lsuffix='_perc', rsuffix='_cnt')
cd_merge = cvd_2014.join(crd_2014,how='left',lsuffix='_cvd', rsuffix='_crd')
disease_merge = cd_merge.join(diabetes_2012,how='left',lsuffix='_cd', rsuffix='_dia')
risk_data = pop_merge.join(disease_merge,how='left',lsuffix='_pop', rsuffix='_dis')

In [19]:
keep_cols = ['GEO_ID_perc','state_fips_cvd', 'NAME_perc', 'pop_perc', 'median_age_perc',
             'perc_62plus', 'perc_65plus', 'perc_85plus',
             'pop_62plus', 'pop_65plus', 'pop_85plus',
             'cvd_rate', 'cvd_chg_1980_2014','crd_rate', 'crd_chg_1980_2014','diabetes_rate',
             #the columns below can be omitted if you want a shorter dataset
             'perc_under5', 'perc_5to9','perc_10to14', 'perc_15to19', 'perc_20to24', 
             'perc_25to34','perc_35to44', 'perc_45to54', 'perc_55to59', 'perc_60to64',
             'perc_65to74', 'perc_75to84',
             'pop_under5', 'pop_5to9', 'pop_10to14', 'pop_15to19', 'pop_20to24',
             'pop_25to34', 'pop_35to44', 'pop_45to54', 'pop_55to59', 'pop_60to64',
             'pop_65to74', 'pop_75to84']

risk_data = risk_data[keep_cols]
risk_data.rename(columns={'GEO_ID_perc':'geoid','state_fips_cvd':'state_fips','NAME_perc':'cnty_name',
                          'pop_perc':'population','median_age_perc':'median_age'}, inplace=True)

In [20]:
#Important to note that these are quartiles calculated from regional data only
#Should probably pull in all US
risk_data['us_diabetes_q'] = pd.qcut(risk_data['diabetes_rate'], 4, [1,2,3,4])
risk_data['us_cvd_q'] = pd.qcut(risk_data['cvd_rate'], 4, [1,2,3,4])
risk_data['us_crd_q'] = pd.qcut(risk_data['crd_rate'], 4, [1,2,3,4])
risk_data['us_62plus_q'] = pd.qcut(risk_data['perc_62plus'], 4, [1,2,3,4])

risk_data['us_diabetes_q'] = (risk_data['us_diabetes_q'].astype('category').cat.codes)+1
risk_data['us_cvd_q'] = (risk_data['us_cvd_q'].astype('category').cat.codes)+1
risk_data['us_crd_q'] = (risk_data['us_crd_q'].astype('category').cat.codes)+1
risk_data['us_62plus_q'] = (risk_data['us_62plus_q'].astype('category').cat.codes)+1

risk_data['us_q_total'] = risk_data['us_diabetes_q'] + risk_data['us_cvd_q'] + risk_data['us_crd_q'] + risk_data['us_62plus_q']

In [21]:
reorder_cols = ['geoid', 'state_fips','cnty_name','population', 'perc_62plus',
                'cvd_rate','crd_rate','diabetes_rate','us_diabetes_q',
                'us_cvd_q', 'us_crd_q', 'us_62plus_q', 'us_q_total',
                'cvd_chg_1980_2014', 'crd_chg_1980_2014',
                'perc_65plus', 'perc_85plus', 'pop_62plus', 'pop_65plus', 'pop_85plus',
                'perc_under5','perc_5to9', 'perc_10to14', 'perc_15to19', 'perc_20to24', 
                'perc_25to34','perc_35to44', 'perc_45to54', 'perc_55to59', 'perc_60to64',
                'perc_65to74', 'perc_75to84', 'pop_under5', 'pop_5to9', 'pop_10to14',
                'pop_15to19', 'pop_20to24', 'pop_25to34', 'pop_35to44', 'pop_45to54',
                'pop_55to59', 'pop_60to64', 'pop_65to74', 'pop_75to84', 'median_age']
risk_data = risk_data[reorder_cols]

In [24]:
risk_data.sort_values('cnty_fips', ascending=True).head(10)

Unnamed: 0_level_0,geoid,state_fips,cnty_name,population,perc_62plus,cvd_rate,crd_rate,diabetes_rate,us_diabetes_q,us_cvd_q,us_crd_q,us_62plus_q,us_q_total,cvd_chg_1980_2014,crd_chg_1980_2014,perc_65plus,perc_85plus,pop_62plus,pop_65plus,pop_85plus,perc_under5,perc_5to9,perc_10to14,perc_15to19,perc_20to24,perc_25to34,perc_35to44,perc_45to54,perc_55to59,perc_60to64,perc_65to74,perc_75to84,pop_under5,pop_5to9,pop_10to14,pop_15to19,pop_20to24,pop_25to34,pop_35to44,pop_45to54,pop_55to59,pop_60to64,pop_65to74,pop_75to84,median_age
cnty_fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
1,0400000US01,1,Alabama,4864680,19.8,310.59,73.44,16.19,4,3,4,2,13,-39.91,73.08,16.1,1.7,963974,783832,83436,6.0,6.2,6.4,6.7,6.9,13.0,12.3,13.2,6.9,6.4,9.5,4.9,292125,302174,312093,323914,334416,632660,599382,641069,334011,309004,463057,237339,38.9
1001,0500000US01001,1,"Autauga County, Alabama",55200,17.0,316.36,81.79,15.45,3,4,4,1,12,-42.76,75.67,14.6,1.5,9403,8050,815,5.9,7.3,6.5,7.0,6.0,12.8,13.4,14.2,7.5,4.9,8.5,4.6,3263,4009,3570,3855,3337,7064,7374,7853,4131,2694,4711,2524,37.8
1003,0500000US01003,1,"Baldwin County, Alabama",208107,23.9,272.04,54.28,13.44,2,3,2,3,10,-38.96,46.1,19.5,1.9,49811,40665,3949,5.6,5.6,6.9,6.1,5.2,11.2,12.2,13.6,6.9,7.1,11.8,5.8,11609,11689,14323,12707,10790,23326,25377,28330,14440,14851,24551,12165,42.8
1005,0500000US01005,1,"Barbour County, Alabama",25782,21.9,255.09,69.82,19.36,4,2,3,2,11,-50.52,62.98,18.0,1.6,5658,4634,422,5.4,5.6,6.5,5.6,6.4,14.3,12.0,13.4,6.4,6.4,10.9,5.4,1390,1450,1677,1434,1658,3675,3104,3452,1657,1651,2820,1392,39.9
1007,0500000US01007,1,"Bibb County, Alabama",22527,19.2,378.09,84.49,16.68,4,4,4,2,14,-31.09,83.11,16.3,1.9,4323,3661,427,5.7,5.2,5.7,6.7,6.6,13.7,12.9,15.3,6.6,5.4,9.4,4.9,1275,1178,1289,1514,1491,3075,2895,3439,1494,1216,2122,1112,39.9
1009,0500000US01009,1,"Blount County, Alabama",57645,21.5,307.9,87.05,15.06,3,3,4,2,12,-34.94,99.3,17.8,1.5,12400,10233,866,6.0,6.3,6.9,6.4,5.5,11.8,12.4,13.8,6.8,6.3,10.6,5.6,3485,3632,3995,3717,3189,6786,7153,7947,3895,3613,6132,3235,40.8
1011,0500000US01011,1,"Bullock County, Alabama",10352,19.6,322.56,51.49,23.33,4,4,1,2,11,-46.03,44.81,15.6,1.7,2026,1616,175,5.8,6.1,5.2,7.5,6.8,10.8,15.4,12.8,5.7,8.2,9.8,4.2,596,634,540,772,706,1120,1599,1322,593,854,1011,430,39.6
1013,0500000US01013,1,"Butler County, Alabama",20025,23.0,382.55,75.09,18.87,4,4,4,3,15,-30.49,125.6,19.0,2.8,4603,3806,557,6.0,6.5,6.4,6.5,5.4,12.1,12.0,12.1,7.3,6.8,10.4,5.8,1205,1293,1274,1292,1073,2424,2401,2427,1460,1370,2081,1168,40.7
1015,0500000US01015,1,"Calhoun County, Alabama",115098,20.7,368.25,87.32,16.51,4,4,4,2,14,-31.55,76.8,16.8,1.8,23809,19386,2050,5.7,5.9,6.2,6.8,6.6,13.0,12.0,13.0,6.8,7.0,10.0,5.1,6562,6844,7158,7773,7626,15017,13845,14958,7851,8078,11489,5847,39.7
1017,0500000US01017,1,"Chambers County, Alabama",33826,22.9,326.33,84.0,18.45,4,4,4,3,15,-32.78,114.49,18.9,2.1,7733,6409,702,5.8,5.1,6.2,5.8,7.5,10.9,11.7,13.7,7.8,6.5,11.2,5.7,1950,1728,2099,1960,2525,3691,3973,4648,2638,2205,3775,1932,43.0


In [25]:
risk_data.reset_index().to_csv('../data/national-cnty-covid19-risk-analysis.csv', index=False)

In [None]:
risk_data = pd.read_csv('../data/national-cnty-covid19-risk-analysis.csv')

In [None]:
risk_data.head()

In [None]:
print(risk_data['cvd_rate'].min())
print(risk_data['cvd_rate'].mean())
print(risk_data['cvd_rate'].max())

In [None]:
print(risk_data['crd_rate'].min())
print(risk_data['crd_rate'].mean())
print(risk_data['crd_rate'].max())

In [None]:
print(risk_data['diabetes_rate'].min())
print(risk_data['diabetes_rate'].mean())
print(risk_data['diabetes_rate'].max())

In [None]:
risk_data[['cvd_rate','crd_rate','diabetes_rate']].quantile(0)

In [None]:
risk_data[['cvd_rate','crd_rate','diabetes_rate']].quantile(.25)

In [None]:
risk_data[['cvd_rate','crd_rate','diabetes_rate']].quantile(.5)

In [None]:
risk_data[['cvd_rate','crd_rate','diabetes_rate']].quantile(.75)

In [None]:
risk_data[['cvd_rate','crd_rate','diabetes_rate']].quantile(1)