In [17]:
import math
import pandas as pd
import numpy as np
from scipy import stats
import pprint

In [18]:
# load all data sources, all sliced by counties
covid_df = pd.read_csv('data/covid-2021-us-counties.csv', index_col=False)
population_df = pd.read_csv('data/2020CensusTotalPopulation.csv', index_col=False, skiprows = 1)
obesity_df = pd.read_csv('data/ObesityData.csv', index_col=False, skiprows = 2)
pi_df = pd.read_csv('data/Physical_Inactivity_Data.csv', index_col=False, skiprows = 2) # physical inactivity
diabetes_df = pd.read_csv('data/DiabetesAtlasData.csv', index_col=False, skiprows = 2)
hospitalization_df = pd.read_csv('data/hospitalizations.csv', index_col=False, skiprows = 0) # hospitalizations
vaccinations_df = pd.read_csv('data/vaccinations.csv', index_col=False, skiprows = 0) # vaccinations


In [19]:
# util function
def isfloat(num):
    if not num:
        return False

    try:
        float(num)
        return True
    except ValueError:
        return False

In [22]:
# extract 2021-02-28 data for covid cases and deaths

# new a dictionary, key is state, values will be 
#  - covid cases, deaths, hospitalization, vaccination, population, pi, obesity, diabetes
#  - cases pct, deaths pct, pi pct, obesity pct, diaebets pct, hospitalization pct, vaccinations pct
ccdata = {}

print("covid size: ", date_df['state'].size)
for index, row in date_df.iterrows():
    state = row['state']
    v = {}
    ccdata[state] = v
    v['state'] = state
    v['cases'] = row['cases']
    v['hospitalization'] = row['cumulative_hospitalized_patients']
    v['vaccinations'] = row['vaccinations']
    if math.isnan(row['deaths']):
        v['deaths'] = 0 
    else:
        v['deaths'] = row['deaths']
    
print(ccdata[1001])
print(len(ccdata))

covid size:  3246


KeyError: 'cumulative_hospitalized_patients'

In [23]:
# pseudo programming 
date_df = covid_df[(covid_df['date'] == "2021-02-28")]
date_df2 = hospitalization_df[(hospitalization_df['date'] == "2021-02-28")]
print(date_df2.head(20))

             date location_key  new_hospitalized_patients  \
485    2021-02-28           AR                      411.0   
1301   2021-02-28         AR_A                       16.0   
2007   2021-02-28     AR_A_007                        0.0   
2565   2021-02-28     AR_A_014                        0.0   
3946   2021-02-28     AR_A_028                       10.0   
4667   2021-02-28     AR_A_035                        2.0   
6047   2021-02-28     AR_A_049                        0.0   
6815   2021-02-28     AR_A_056                        1.0   
8536   2021-02-28     AR_A_098                        0.0   
9694   2021-02-28     AR_A_112                        2.0   
10265  2021-02-28     AR_A_119                        0.0   
10898  2021-02-28     AR_A_126                        1.0   
11571  2021-02-28     AR_A_133                        0.0   
12215  2021-02-28     AR_A_140                        0.0   
12909  2021-02-28     AR_A_147                        0.0   
13498  2021-02-28     AR

In [24]:
print(population_df.head(-10))

      Estimate!!Total Margin of Error!!Total              id  \
0               55639                  *****  0500000US01001   
1              218289                  *****  0500000US01003   
2               25026                  *****  0500000US01005   
3               22374                  *****  0500000US01007   
4               57755                  *****  0500000US01009   
...               ...                    ...             ...   
3207           326953                  *****  0500000US72127   
3208            36586                  *****  0500000US72129   
3209            36292                  *****  0500000US72131   
3210            21474                  *****  0500000US72133   
3211            72450                  *****  0500000US72135   

                      Geographic Area Name  
0                  Autauga County, Alabama  
1                  Baldwin County, Alabama  
2                  Barbour County, Alabama  
3                     Bibb County, Alabama  
4     

In [84]:
# merge population data into ccdata
ccdata_state = {}
pop_fips = {}
for index, row in population_df.iterrows(): 
    state_str = row['Geographic Area Name']
    if not ',' in state_str:
        continue
    state_str = state_str[state_str.rindex(',') + 2:]
    #print(state_str)
    #if state_str != 'Alabama':
     #   break
    
    v = ccdata_state.get(state_str)
    if not v:
        v = {}
        ccdata_state[state_str] = v
        v['population'] = 0
    p = row["Estimate!!Total"]
    #print(p)
    v['population'] += p
    #print(p, v)
    fips_str = row['id']
    fips_str = fips_str[-5:]
    if not isfloat(fips_str):
        print("fips: ", fips_str)
        continue
    fips = int(fips_str)
    pop_fips[fips] = p

#pprint.pprint(pop_fips)
pprint.pprint(ccdata_state)
p = 0
for k, v in ccdata_state.items():
    p += v['population']
    print(p)

{'Alabama': {'population': 4893186},
 'Alaska': {'population': 736990},
 'Arizona': {'population': 7174064},
 'Arkansas': {'population': 3011873},
 'California': {'population': 39346023},
 'Colorado': {'population': 5684926},
 'Connecticut': {'population': 3570549},
 'Delaware': {'population': 967679},
 'District of Columbia': {'population': 701974},
 'Florida': {'population': 21216924},
 'Georgia': {'population': 10516579},
 'Hawaii': {'population': 1420074},
 'Idaho': {'population': 1754367},
 'Illinois': {'population': 12716164},
 'Indiana': {'population': 6696893},
 'Iowa': {'population': 3150011},
 'Kansas': {'population': 2912619},
 'Kentucky': {'population': 4461952},
 'Louisiana': {'population': 4664616},
 'Maine': {'population': 1340825},
 'Maryland': {'population': 6037624},
 'Massachusetts': {'population': 6873003},
 'Michigan': {'population': 9973907},
 'Minnesota': {'population': 5600166},
 'Mississippi': {'population': 2981835},
 'Missouri': {'population': 6124160},
 'Mon

In [79]:
pprint.pprint(pop_fips)

{1001: 55639,
 1003: 218289,
 1005: 25026,
 1007: 22374,
 1009: 57755,
 1011: 10173,
 1013: 19726,
 1015: 114324,
 1017: 33427,
 1019: 26035,
 1021: 44147,
 1023: 12755,
 1025: 23866,
 1027: 13285,
 1029: 14952,
 1031: 52238,
 1033: 54957,
 1035: 12219,
 1037: 10696,
 1039: 37096,
 1041: 13826,
 1043: 83345,
 1045: 49293,
 1047: 38184,
 1049: 71430,
 1051: 81526,
 1053: 36775,
 1055: 102721,
 1057: 16406,
 1059: 31587,
 1061: 26383,
 1063: 8221,
 1065: 14754,
 1067: 17123,
 1069: 105319,
 1071: 51765,
 1073: 658615,
 1075: 13854,
 1077: 92870,
 1079: 32969,
 1081: 163461,
 1083: 96921,
 1085: 9936,
 1087: 18437,
 1089: 367686,
 1091: 19138,
 1093: 29818,
 1095: 96137,
 1097: 413977,
 1099: 21006,
 1101: 226451,
 1103: 119352,
 1105: 9104,
 1107: 20049,
 1109: 33274,
 1111: 22732,
 1113: 57938,
 1115: 88929,
 1117: 216350,
 1119: 12595,
 1121: 80244,
 1123: 40450,
 1125: 208854,
 1127: 63802,
 1129: 16336,
 1131: 10552,
 1133: 23712,
 2013: 3389,
 2016: 5708,
 2020: 292090,
 2050: 18263

 20161: 74059,
 20163: 4985,
 20165: 2953,
 20167: 6896,
 20169: 54384,
 20171: 4893,
 20173: 515416,
 20175: 21902,
 20177: 177293,
 20179: 2515,
 20181: 5904,
 20183: 3594,
 20185: 4125,
 20187: 2030,
 20189: 5498,
 20191: 22928,
 20193: 7748,
 20195: 2802,
 20197: 6877,
 20199: 1583,
 20201: 5474,
 20203: 2112,
 20205: 8600,
 20207: 3117,
 20209: 165447,
 21001: 19366,
 21003: 21065,
 21005: 22580,
 21007: 7914,
 21009: 44026,
 21011: 12421,
 21013: 26426,
 21015: 132368,
 21017: 19998,
 21019: 47361,
 21021: 30090,
 21023: 8308,
 21025: 12802,
 21027: 20283,
 21029: 80921,
 21031: 12756,
 21033: 12679,
 21035: 38991,
 21037: 93608,
 21039: 4738,
 21041: 10691,
 21043: 26976,
 21045: 15968,
 21047: 71470,
 21049: 36152,
 21051: 20110,
 21053: 10175,
 21055: 8940,
 21057: 6660,
 21059: 101001,
 21061: 12195,
 21063: 7461,
 21065: 14187,
 21067: 322200,
 21069: 14519,
 21071: 35931,
 21073: 50744,
 21075: 6064,
 21077: 8760,
 21079: 17554,
 21081: 25107,
 21083: 37125,
 21085: 26313,


 31129: 4204,
 31131: 15965,
 31133: 2640,
 31135: 2889,
 31137: 9050,
 31139: 7132,
 31141: 33250,
 31143: 5208,
 31145: 10725,
 31147: 7913,
 31149: 1430,
 31151: 14221,
 31153: 183956,
 31155: 21356,
 31157: 35884,
 31159: 17217,
 31161: 5215,
 31163: 3015,
 31165: 1298,
 31167: 5946,
 31169: 5000,
 31171: 586,
 31173: 7218,
 31175: 4100,
 31177: 20546,
 31179: 9388,
 31181: 3497,
 31183: 689,
 31185: 13671,
 32001: 24606,
 32003: 2228866,
 32005: 48486,
 32007: 52537,
 32009: 1030,
 32011: 1839,
 32013: 16834,
 32015: 5565,
 32017: 5177,
 32019: 55667,
 32021: 4487,
 32023: 45514,
 32027: 6591,
 32029: 4086,
 32031: 464182,
 32033: 9570,
 32510: 55244,
 33001: 61174,
 33003: 48461,
 33005: 76040,
 33007: 31486,
 33009: 90331,
 33011: 415305,
 33013: 150902,
 33015: 308211,
 33017: 130161,
 33019: 43173,
 34001: 264650,
 34003: 931275,
 34005: 446301,
 34007: 506721,
 34009: 92701,
 34011: 150085,
 34013: 798698,
 34015: 291745,
 34017: 671923,
 34019: 125063,
 34021: 368085,
 34023

 47079: 32251,
 47081: 25017,
 47083: 8201,
 47085: 18528,
 47087: 11767,
 47089: 54162,
 47091: 17755,
 47093: 466184,
 47095: 7273,
 47097: 25689,
 47099: 43780,
 47101: 12131,
 47103: 34158,
 47105: 53169,
 47107: 53392,
 47109: 25814,
 47111: 24208,
 47113: 97838,
 47115: 28639,
 47117: 33708,
 47119: 94615,
 47121: 12237,
 47123: 46413,
 47125: 204992,
 47127: 6396,
 47129: 21538,
 47131: 30343,
 47133: 22171,
 47135: 8020,
 47137: 5068,
 47139: 16807,
 47141: 78542,
 47143: 32964,
 47145: 53331,
 47147: 70982,
 47149: 324139,
 47151: 22020,
 47153: 14936,
 47155: 98007,
 47157: 936611,
 47159: 19926,
 47161: 13553,
 47163: 157707,
 47165: 187680,
 47167: 61562,
 47169: 10910,
 47171: 17821,
 47173: 19678,
 47175: 5813,
 47177: 40971,
 47179: 128874,
 47181: 16638,
 47183: 33377,
 47185: 27087,
 47187: 232380,
 47189: 140604,
 48001: 57917,
 48003: 18227,
 48005: 87119,
 48007: 24220,
 48009: 8754,
 48011: 1950,
 48013: 50194,
 48015: 29892,
 48017: 6916,
 48019: 22770,
 48021: 86

 72149: 21899,
 72151: 32867,
 72153: 34501}


In [87]:
ccdata_fips = {}
ccdata_state = {}
for index, row in obesity_df.iterrows():
    state = row['State']
    fips = row['County_FIPS']
    
    v = ccdata_fips.get(fips)
    if not v:
        v = {}
        ccdata_fips[fips] = v
        v['obesity_fips_cases'] = 0
         
    pct = row['Obesity Percentage']
    #print(pct)
    p = pop_fips.get(fips, 0)
    #print(p)
    v['obesity_fips_cases'] = (pct/100) * p

for index, row in obesity_df.iterrows():
    fips = row['County_FIPS']
    v = ccdata_state.get(state)
    if not v:
        v = {}
        v['obesity_state_cases'] = 0
    cases = ccdata_fips[fips]
    print(cases)
    v['obesity_state_cases'] += cases
    
pprint.pprint(ccdata_fips)
pprint.pprint(ccdata_state)


{'obesity_fips_cases': 16469.144000000004}


TypeError: unsupported operand type(s) for +=: 'int' and 'dict'

In [27]:
for index, row in diabetes_df.iterrows():
    fips = row['County_FIPS']
    v = ccdata.get(fips)
    if not v:
        print("no ccdata for: ", fips)
        continue
    else: 
        v['diabetes_pct'] = row['Diagnosed Diabetes Percentage']
print(ccdata[1001])

no ccdata for:  1001.0
no ccdata for:  1003.0
no ccdata for:  1005.0
no ccdata for:  1007.0
no ccdata for:  1009.0
no ccdata for:  1011.0
no ccdata for:  1013.0
no ccdata for:  1015.0
no ccdata for:  1017.0
no ccdata for:  1019.0
no ccdata for:  1021.0
no ccdata for:  1023.0
no ccdata for:  1025.0
no ccdata for:  1027.0
no ccdata for:  1029.0
no ccdata for:  1031.0
no ccdata for:  1033.0
no ccdata for:  1035.0
no ccdata for:  1037.0
no ccdata for:  1039.0
no ccdata for:  1041.0
no ccdata for:  1043.0
no ccdata for:  1045.0
no ccdata for:  1047.0
no ccdata for:  1049.0
no ccdata for:  1051.0
no ccdata for:  1053.0
no ccdata for:  1055.0
no ccdata for:  1057.0
no ccdata for:  1059.0
no ccdata for:  1061.0
no ccdata for:  1063.0
no ccdata for:  1065.0
no ccdata for:  1067.0
no ccdata for:  1069.0
no ccdata for:  1071.0
no ccdata for:  1073.0
no ccdata for:  1075.0
no ccdata for:  1077.0
no ccdata for:  1079.0
no ccdata for:  1081.0
no ccdata for:  1083.0
no ccdata for:  1085.0
no ccdata f

no ccdata for:  19119.0
no ccdata for:  19121.0
no ccdata for:  19123.0
no ccdata for:  19125.0
no ccdata for:  19127.0
no ccdata for:  19129.0
no ccdata for:  19131.0
no ccdata for:  19133.0
no ccdata for:  19135.0
no ccdata for:  19137.0
no ccdata for:  19139.0
no ccdata for:  19141.0
no ccdata for:  19143.0
no ccdata for:  19145.0
no ccdata for:  19147.0
no ccdata for:  19149.0
no ccdata for:  19151.0
no ccdata for:  19153.0
no ccdata for:  19155.0
no ccdata for:  19157.0
no ccdata for:  19159.0
no ccdata for:  19161.0
no ccdata for:  19163.0
no ccdata for:  19165.0
no ccdata for:  19167.0
no ccdata for:  19169.0
no ccdata for:  19171.0
no ccdata for:  19173.0
no ccdata for:  19175.0
no ccdata for:  19177.0
no ccdata for:  19179.0
no ccdata for:  19181.0
no ccdata for:  19183.0
no ccdata for:  19185.0
no ccdata for:  19187.0
no ccdata for:  19189.0
no ccdata for:  19191.0
no ccdata for:  19193.0
no ccdata for:  19195.0
no ccdata for:  19197.0
no ccdata for:  20001.0
no ccdata for:  

no ccdata for:  29047.0
no ccdata for:  29049.0
no ccdata for:  29051.0
no ccdata for:  29053.0
no ccdata for:  29055.0
no ccdata for:  29057.0
no ccdata for:  29059.0
no ccdata for:  29061.0
no ccdata for:  29063.0
no ccdata for:  29065.0
no ccdata for:  29067.0
no ccdata for:  29069.0
no ccdata for:  29071.0
no ccdata for:  29073.0
no ccdata for:  29075.0
no ccdata for:  29077.0
no ccdata for:  29079.0
no ccdata for:  29081.0
no ccdata for:  29083.0
no ccdata for:  29085.0
no ccdata for:  29087.0
no ccdata for:  29089.0
no ccdata for:  29091.0
no ccdata for:  29093.0
no ccdata for:  29095.0
no ccdata for:  29097.0
no ccdata for:  29099.0
no ccdata for:  29101.0
no ccdata for:  29103.0
no ccdata for:  29105.0
no ccdata for:  29107.0
no ccdata for:  29109.0
no ccdata for:  29111.0
no ccdata for:  29113.0
no ccdata for:  29115.0
no ccdata for:  29117.0
no ccdata for:  29121.0
no ccdata for:  29123.0
no ccdata for:  29125.0
no ccdata for:  29127.0
no ccdata for:  29119.0
no ccdata for:  

no ccdata for:  39113.0
no ccdata for:  39115.0
no ccdata for:  39117.0
no ccdata for:  39119.0
no ccdata for:  39121.0
no ccdata for:  39123.0
no ccdata for:  39125.0
no ccdata for:  39127.0
no ccdata for:  39129.0
no ccdata for:  39131.0
no ccdata for:  39133.0
no ccdata for:  39135.0
no ccdata for:  39137.0
no ccdata for:  39139.0
no ccdata for:  39141.0
no ccdata for:  39143.0
no ccdata for:  39145.0
no ccdata for:  39147.0
no ccdata for:  39149.0
no ccdata for:  39151.0
no ccdata for:  39153.0
no ccdata for:  39155.0
no ccdata for:  39157.0
no ccdata for:  39159.0
no ccdata for:  39161.0
no ccdata for:  39163.0
no ccdata for:  39165.0
no ccdata for:  39167.0
no ccdata for:  39169.0
no ccdata for:  39171.0
no ccdata for:  39173.0
no ccdata for:  39175.0
no ccdata for:  40001.0
no ccdata for:  40003.0
no ccdata for:  40005.0
no ccdata for:  40007.0
no ccdata for:  40009.0
no ccdata for:  40011.0
no ccdata for:  40013.0
no ccdata for:  40015.0
no ccdata for:  40017.0
no ccdata for:  

no ccdata for:  50027.0
no ccdata for:  51001.0
no ccdata for:  51003.0
no ccdata for:  51510.0
no ccdata for:  51005.0
no ccdata for:  51007.0
no ccdata for:  51009.0
no ccdata for:  51011.0
no ccdata for:  51013.0
no ccdata for:  51015.0
no ccdata for:  51017.0
no ccdata for:  51019.0
no ccdata for:  51021.0
no ccdata for:  51023.0
no ccdata for:  51520.0
no ccdata for:  51025.0
no ccdata for:  51027.0
no ccdata for:  51029.0
no ccdata for:  51530.0
no ccdata for:  51031.0
no ccdata for:  51033.0
no ccdata for:  51035.0
no ccdata for:  51036.0
no ccdata for:  51037.0
no ccdata for:  51540.0
no ccdata for:  51550.0
no ccdata for:  51041.0
no ccdata for:  51043.0
no ccdata for:  51570.0
no ccdata for:  51580.0
no ccdata for:  51045.0
no ccdata for:  51047.0
no ccdata for:  51049.0
no ccdata for:  51590.0
no ccdata for:  51051.0
no ccdata for:  51053.0
no ccdata for:  51595.0
no ccdata for:  51057.0
no ccdata for:  51059.0
no ccdata for:  51600.0
no ccdata for:  51610.0
no ccdata for:  

KeyError: 1001

In [13]:
for index, row in pi_df.iterrows():
    fips = row['County_FIPS']
    v = ccdata.get(fips) 
    if not v:
        print("no ccdata for: ", fips)
        continue
    else: 
        v['pi_pct'] = row['Physical Inactivity Percentage']
print(ccdata[1001])

no ccdata for:  1001.0
no ccdata for:  1003.0
no ccdata for:  1005.0
no ccdata for:  1007.0
no ccdata for:  1009.0
no ccdata for:  1011.0
no ccdata for:  1013.0
no ccdata for:  1015.0
no ccdata for:  1017.0
no ccdata for:  1019.0
no ccdata for:  1021.0
no ccdata for:  1023.0
no ccdata for:  1025.0
no ccdata for:  1027.0
no ccdata for:  1029.0
no ccdata for:  1031.0
no ccdata for:  1033.0
no ccdata for:  1035.0
no ccdata for:  1037.0
no ccdata for:  1039.0
no ccdata for:  1041.0
no ccdata for:  1043.0
no ccdata for:  1045.0
no ccdata for:  1047.0
no ccdata for:  1049.0
no ccdata for:  1051.0
no ccdata for:  1053.0
no ccdata for:  1055.0
no ccdata for:  1057.0
no ccdata for:  1059.0
no ccdata for:  1061.0
no ccdata for:  1063.0
no ccdata for:  1065.0
no ccdata for:  1067.0
no ccdata for:  1069.0
no ccdata for:  1071.0
no ccdata for:  1073.0
no ccdata for:  1075.0
no ccdata for:  1077.0
no ccdata for:  1079.0
no ccdata for:  1081.0
no ccdata for:  1083.0
no ccdata for:  1085.0
no ccdata f

no ccdata for:  18067.0
no ccdata for:  18069.0
no ccdata for:  18071.0
no ccdata for:  18073.0
no ccdata for:  18075.0
no ccdata for:  18077.0
no ccdata for:  18079.0
no ccdata for:  18081.0
no ccdata for:  18083.0
no ccdata for:  18085.0
no ccdata for:  18087.0
no ccdata for:  18089.0
no ccdata for:  18091.0
no ccdata for:  18093.0
no ccdata for:  18095.0
no ccdata for:  18097.0
no ccdata for:  18099.0
no ccdata for:  18101.0
no ccdata for:  18103.0
no ccdata for:  18105.0
no ccdata for:  18107.0
no ccdata for:  18109.0
no ccdata for:  18111.0
no ccdata for:  18113.0
no ccdata for:  18115.0
no ccdata for:  18117.0
no ccdata for:  18119.0
no ccdata for:  18121.0
no ccdata for:  18123.0
no ccdata for:  18125.0
no ccdata for:  18127.0
no ccdata for:  18129.0
no ccdata for:  18131.0
no ccdata for:  18133.0
no ccdata for:  18135.0
no ccdata for:  18137.0
no ccdata for:  18139.0
no ccdata for:  18143.0
no ccdata for:  18145.0
no ccdata for:  18147.0
no ccdata for:  18141.0
no ccdata for:  

no ccdata for:  27153.0
no ccdata for:  27155.0
no ccdata for:  27157.0
no ccdata for:  27159.0
no ccdata for:  27161.0
no ccdata for:  27163.0
no ccdata for:  27165.0
no ccdata for:  27167.0
no ccdata for:  27169.0
no ccdata for:  27171.0
no ccdata for:  27173.0
no ccdata for:  28001.0
no ccdata for:  28003.0
no ccdata for:  28005.0
no ccdata for:  28007.0
no ccdata for:  28009.0
no ccdata for:  28011.0
no ccdata for:  28013.0
no ccdata for:  28015.0
no ccdata for:  28017.0
no ccdata for:  28019.0
no ccdata for:  28021.0
no ccdata for:  28023.0
no ccdata for:  28025.0
no ccdata for:  28027.0
no ccdata for:  28029.0
no ccdata for:  28031.0
no ccdata for:  28033.0
no ccdata for:  28035.0
no ccdata for:  28037.0
no ccdata for:  28039.0
no ccdata for:  28041.0
no ccdata for:  28043.0
no ccdata for:  28045.0
no ccdata for:  28047.0
no ccdata for:  28049.0
no ccdata for:  28051.0
no ccdata for:  28053.0
no ccdata for:  28055.0
no ccdata for:  28057.0
no ccdata for:  28059.0
no ccdata for:  

no ccdata for:  37183.0
no ccdata for:  37185.0
no ccdata for:  37187.0
no ccdata for:  37189.0
no ccdata for:  37191.0
no ccdata for:  37193.0
no ccdata for:  37195.0
no ccdata for:  37197.0
no ccdata for:  37199.0
no ccdata for:  38001.0
no ccdata for:  38003.0
no ccdata for:  38005.0
no ccdata for:  38007.0
no ccdata for:  38009.0
no ccdata for:  38011.0
no ccdata for:  38013.0
no ccdata for:  38015.0
no ccdata for:  38017.0
no ccdata for:  38019.0
no ccdata for:  38021.0
no ccdata for:  38023.0
no ccdata for:  38025.0
no ccdata for:  38027.0
no ccdata for:  38029.0
no ccdata for:  38031.0
no ccdata for:  38033.0
no ccdata for:  38035.0
no ccdata for:  38037.0
no ccdata for:  38039.0
no ccdata for:  38041.0
no ccdata for:  38043.0
no ccdata for:  38045.0
no ccdata for:  38047.0
no ccdata for:  38049.0
no ccdata for:  38051.0
no ccdata for:  38053.0
no ccdata for:  38055.0
no ccdata for:  38057.0
no ccdata for:  38059.0
no ccdata for:  38061.0
no ccdata for:  38063.0
no ccdata for:  

no ccdata for:  48415.0
no ccdata for:  48417.0
no ccdata for:  48419.0
no ccdata for:  48421.0
no ccdata for:  48423.0
no ccdata for:  48425.0
no ccdata for:  48427.0
no ccdata for:  48429.0
no ccdata for:  48431.0
no ccdata for:  48433.0
no ccdata for:  48435.0
no ccdata for:  48437.0
no ccdata for:  48439.0
no ccdata for:  48441.0
no ccdata for:  48443.0
no ccdata for:  48445.0
no ccdata for:  48447.0
no ccdata for:  48449.0
no ccdata for:  48451.0
no ccdata for:  48453.0
no ccdata for:  48455.0
no ccdata for:  48457.0
no ccdata for:  48459.0
no ccdata for:  48461.0
no ccdata for:  48463.0
no ccdata for:  48465.0
no ccdata for:  48467.0
no ccdata for:  48469.0
no ccdata for:  48471.0
no ccdata for:  48473.0
no ccdata for:  48475.0
no ccdata for:  48477.0
no ccdata for:  48479.0
no ccdata for:  48481.0
no ccdata for:  48483.0
no ccdata for:  48485.0
no ccdata for:  48487.0
no ccdata for:  48489.0
no ccdata for:  48491.0
no ccdata for:  48493.0
no ccdata for:  48495.0
no ccdata for:  

KeyError: 1001

In [39]:
ccdatalist = []
keys = list(ccdata.keys())
for k in keys:
    v = ccdata.get(k)

    if not 'population' in v.keys():
        print("no population data, ", v['fips'])
        continue
    if not 'obesity_pct' in v.keys():
        print("no obesity data, ", v['fips'])
        continue
    
    
    # make a list from the map
    row = []
    row.append(v['fips'])
    row.append(v['state'])
    row.append(v['county'])
    row.append(v['cases'])
    row.append(v['hospitalizations'])
    row.append(v['deaths'])
    row.append(v['vaccinations'])
    row.append(v['population'])
    row.append(v['cases_pct'])
    row.append(v['deaths_pct'])
    row.append(v['obesity_pct'])
    row.append(v['diabetes_pct'])
    row.append(v['pi_pct'])
    

    ccdatalist.append(row)
    
ccdf = pd.DataFrame(ccdatalist, columns = ['County FIPS', 'State', 'County', 'Covid Cases', 'Covid Deaths', 'County Population', 'Covid Case Percentage', 'Covid Death Percentage', 'Obesity Percentage', 'Diabetes Percentage', 'Physical Inactivity Percentage'])
#print(ccdf)

no population data,  2997.0
no population data,  2261.0
no population data,  2998.0
no obesity data,  35039.0
no population data,  69110.0
no population data,  69120.0
no obesity data,  72001.0
no obesity data,  72003.0
no obesity data,  72005.0
no obesity data,  72007.0
no obesity data,  72009.0
no obesity data,  72011.0
no obesity data,  72013.0
no obesity data,  72015.0
no obesity data,  72017.0
no obesity data,  72019.0
no obesity data,  72021.0
no obesity data,  72023.0
no obesity data,  72025.0
no obesity data,  72027.0
no obesity data,  72029.0
no obesity data,  72031.0
no obesity data,  72033.0
no obesity data,  72035.0
no obesity data,  72037.0
no obesity data,  72039.0
no obesity data,  72041.0
no obesity data,  72043.0
no obesity data,  72045.0
no obesity data,  72047.0
no obesity data,  72049.0
no obesity data,  72051.0
no obesity data,  72053.0
no obesity data,  72054.0
no obesity data,  72055.0
no obesity data,  72057.0
no obesity data,  72059.0
no obesity data,  72061.0


In [136]:
ccdf.corr(method ='pearson')

Unnamed: 0,County FIPS,Covid Cases,Covid Deaths,County Population,Covid Case Percentage,Covid Death Percentage,Obesity Percentage,Diabetes Percentage,Physical Inactivity Percentage
County FIPS,1.0,0.004371,0.003397,-0.055882,-0.041958,-0.035142,0.005332,-0.082733,-0.155159
Covid Cases,0.004371,1.0,0.955142,0.267822,0.006598,-0.024492,-0.072476,-0.045507,-0.0314
Covid Deaths,0.003397,0.955142,1.0,0.178252,0.01281,-0.015572,-0.056952,-0.032139,-0.025655
County Population,-0.055882,0.267822,0.178252,1.0,-0.103838,-0.109318,-0.032191,-0.017881,-0.066131
Covid Case Percentage,-0.041958,0.006598,0.01281,-0.103838,1.0,0.982783,-0.109518,-0.065316,-0.046833
Covid Death Percentage,-0.035142,-0.024492,-0.015572,-0.109318,0.982783,1.0,-0.052663,-0.00046,-0.000539
Obesity Percentage,0.005332,-0.072476,-0.056952,-0.032191,-0.109518,-0.052663,1.0,0.628975,0.677908
Diabetes Percentage,-0.082733,-0.045507,-0.032139,-0.017881,-0.065316,-0.00046,0.628975,1.0,0.718043
Physical Inactivity Percentage,-0.155159,-0.0314,-0.025655,-0.066131,-0.046833,-0.000539,0.677908,0.718043,1.0


In [40]:
# raw data size
print("processed data # of rows: ", ccdf['County FIPS'].size)

processed data # of rows:  3131


In [41]:
# keep counties with population greater than 50K only   7yyyyyyyy7
df_sub = ccdf[(ccdf['County Population'] >= 50000)]

###### whole country - Physical Inactivity correlation
print("# of counties: ", df_sub['County FIPS'].size)

r, p = stats.pearsonr(df_sub['Covid Case Percentage'], df_sub['Physical Inactivity Percentage'])
print("covid case - pi correlation:", r, "p-value:", p)
r, p = stats.pearsonr(df_sub['Covid Death Percentage'], df_sub['Physical Inactivity Percentage'])
print("covid death - pi correlation:", r, "p-value:", p)

# of counties:  985
covid case - pi correlation: 0.30083175293684705 p-value: 4.726008887156151e-22
covid death - pi correlation: 0.4060170420839583 p-value: 2.198308964206128e-40


In [42]:
###### whole country - Obesity correlation
print("# of counties: ", df_sub['County FIPS'].size)

r, p = stats.pearsonr(df_sub['Covid Case Percentage'], df_sub['Obesity Percentage'])
print("covid case - obesity correlation:", r, "p-value:", p)
r, p = stats.pearsonr(df_sub['Covid Death Percentage'], df_sub['Obesity Percentage'])
print("covid death - obesity correlation:", r, "p-value:", p)

# of counties:  985
covid case - obesity correlation: 0.1726919522412191 p-value: 4.924064175306927e-08
covid death - obesity correlation: 0.2023788834323017 p-value: 1.4554021251953065e-10


In [43]:
###### whole country - Diabetes correlation
print("# of counties: ", df_sub['County FIPS'].size)

r, p = stats.pearsonr(df_sub['Covid Case Percentage'], df_sub['Diabetes Percentage'])
print("covid case correlation:", r, "p-value:", p)
r, p = stats.pearsonr(df_sub['Covid Death Percentage'], df_sub['Diabetes Percentage'])
print("covid death correlation:", r, "p-value:", p)

# of counties:  985
covid case correlation: 0.22372951996649532 p-value: 1.2208070327342218e-12
covid death correlation: 0.29895167831596625 p-value: 8.742715933432988e-22
