In [1]:
import geopandas as gpd
import numpy as np
import mapclassify as mc
import matplotlib.pyplot as plt

In [2]:
total_dict = {}
county_dict = {}
state_dict = {}
illinois_dict = {}

In [3]:
dynamic_dict = {}
dynamic_county_dict = {}
dynamic_state_dict = {}
dynamic_illinois_dict = {}

1. Read County Data

In [4]:
counties_gdf = gpd.read_file('nyt_counties_data.geojson')

In [5]:
counties_gdf.head(5)

Unnamed: 0,NAME,state_name,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,dt_start,dt_end,dt_unit,geometry
0,Bladen,North Carolina,33778,37017,2020-03-29,,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",4.0,0.0,0.0,0.0,2020-01-21,2020-04-23,day,"MULTIPOLYGON (((-78.90200 34.83527, -78.79960 ..."
1,Stanly,North Carolina,61114,37167,2020-03-20,2020-04-09,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",24.0,2.0,4.0,0.0,2020-01-21,2020-04-23,day,"MULTIPOLYGON (((-80.49738 35.20210, -80.29542 ..."
2,Summit,Ohio,541810,39153,2020-03-14,2020-03-27,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",447.0,32.0,32.0,2.0,2020-01-21,2020-04-23,day,"MULTIPOLYGON (((-81.68699 41.13596, -81.68495 ..."
3,Sullivan,Pennsylvania,6177,42113,2020-04-04,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",1.0,0.0,0.0,0.0,2020-01-21,2020-04-23,day,"MULTIPOLYGON (((-76.81373 41.59003, -76.22014 ..."
4,Upshur,Texas,40769,48459,2020-03-20,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",12.0,0.0,0.0,0.0,2020-01-21,2020-04-23,day,"MULTIPOLYGON (((-95.15274 32.66095, -95.15211 ..."


In [6]:
case = counties_gdf['today_case']
death = counties_gdf['today_death']
case_per_100k_capita = counties_gdf['today_case']/(counties_gdf['population']/100000)
death_per_100k_capita = counties_gdf['today_death']/(counties_gdf['population']/100000)
death_case_ratio = counties_gdf['today_death']/counties_gdf['today_case']
death_case_ratio = death_case_ratio.replace(np.nan, 0)

In [7]:
death_case_ratio

0       0.000000
1       0.166667
2       0.071588
3       0.000000
4       0.000000
5       0.076923
6       0.000000
7       0.000000
8       0.000000
9       0.000000
10      0.100000
11      0.000000
12      0.000000
13      0.000000
14      0.000000
15      0.000000
16      0.000000
17      0.000000
18      0.000000
19      0.063830
20      0.074074
21      0.037736
22      0.000000
23      0.000000
24      0.000000
25      0.000000
26      0.055556
27      0.000000
28      0.064516
29      0.000000
          ...   
3187    0.166667
3188    0.058761
3189    0.000000
3190    0.000000
3191    0.166667
3192    0.011905
3193    0.000000
3194    0.000000
3195    0.142857
3196    0.000000
3197    0.000000
3198    0.000000
3199    0.000000
3200    0.000000
3201    0.000000
3202    0.071429
3203    0.045455
3204    0.000000
3205    0.000000
3206    0.000000
3207    0.028103
3208    0.037344
3209    0.111111
3210    0.000000
3211    0.000000
3212    0.200000
3213    0.098765
3214    0.0610

1.1 Get Constant Classes from today's data

In [8]:
def create_dict(column,name,dict):
    tmp_dict = {}
    tmp_dict['Quantiles'] = {
        'bins': ','.join(mc.Quantiles(column, k=6).bins.astype(str)),
        'k': 6
    }
    tmp_dict['FisherJenks'] = {
        'bins': ','.join(mc.FisherJenks(column, k=6).bins.astype(str)),
        'k': 6
    }
    tmp_dict['NaturalBreaks'] = {
        'bins': ','.join(mc.NaturalBreaks(column, k=6).bins.astype(str)),
        'k': 6
    }
    dict[name] = tmp_dict

In [9]:
create_dict(case,'case',county_dict)

In [10]:
create_dict(death,'death',county_dict)



In [11]:
create_dict(case_per_100k_capita,'case_per_100k_capita',county_dict)

In [12]:
create_dict(death_per_100k_capita,'death_per_100k_capita',county_dict)



In [13]:
create_dict(death_case_ratio,'death_case_ratio',county_dict)



In [14]:
county_dict

{'case': {'Quantiles': {'bins': '1.0,4.0,12.0,31.0,118.00000000000045,145855.0',
   'k': 6},
  'FisherJenks': {'bins': '966.0,3395.0,6816.0,17508.0,32124.0,145855.0',
   'k': 6},
  'NaturalBreaks': {'bins': '1027.0,3529.0,6816.0,17508.0,32124.0,145855.0',
   'k': 6}},
 'death': {'Quantiles': {'bins': '0.0,1.0,5.0,10889.0', 'k': 6},
  'FisherJenks': {'bins': '49.0,228.0,615.0,1142.0,1813.0,10889.0', 'k': 6},
  'NaturalBreaks': {'bins': '49.0,228.0,615.0,1142.0,1813.0,10889.0', 'k': 6}},
 'case_per_100k_capita': {'Quantiles': {'bins': '6.083465141744753,23.66583836232399,43.656946723470874,74.71115349626226,152.45292852954645,9897.33919966478',
   'k': 6},
  'FisherJenks': {'bins': '136.24400061929092,466.6905940882329,1230.0351366439074,2403.7803040679355,4600.219058050384,9897.33919966478',
   'k': 6},
  'NaturalBreaks': {'bins': '141.01323753791524,466.6905940882329,1230.0351366439074,2403.7803040679355,4600.219058050384,9897.33919966478',
   'k': 6}},
 'death_per_100k_capita': {'Quan

1.2 Create Dynamic classes

In [15]:
case_ts = counties_gdf['cases_ts'].apply(lambda x: x.split(','))
death_ts = counties_gdf['deaths_ts'].apply(lambda x: x.split(','))

In [16]:
length = len(case_ts[1])

In [17]:
new_case_dict = {}
for i in range(1,length):
    q6 = mc.Quantiles(case_ts.apply(lambda x: int(float(x[i]) - float(x[i - 1]))).values, k=6)
    new_case_dict[i] = ','.join(q6.bins.astype(int).astype(str))

  gadf = 1 - self.adcm / adam


In [18]:
new_death_dict = {}
for i in range(1,length):
    d6 = mc.Quantiles(death_ts.apply(lambda x: int(float(x[i]) - float(x[i - 1]))).values, k=6)
    new_death_dict[i] = ','.join(d6.bins.astype(int).astype(str))

In [19]:
dynamic_county_dict['new_case'] = new_case_dict
dynamic_county_dict['new_death'] = new_death_dict


2.Read State Data

In [20]:
state_gdf = gpd.read_file('nyt_states_data.geojson')

In [21]:
state_gdf.head(5)

Unnamed: 0,NAME,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,dt_start,dt_end,dt_unit,geometry
0,West Virginia,1829054,54,2020-03-17,2020-03-29,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",981.0,42.0,31.0,2.0,2020-01-21,2020-04-23,day,"POLYGON ((-80.51899 40.63880, -80.51934 39.721..."
1,Florida,20598139,12,2020-03-01,2020-03-06,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",29640.0,1339.0,986.0,60.0,2020-01-21,2020-04-23,day,"MULTIPOLYGON (((-82.98748 24.62538, -82.89993 ..."
2,Illinois,12821497,17,2020-01-24,2020-03-17,"0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",36935.0,1827.0,1696.0,119.0,2020-01-21,2020-04-23,day,"POLYGON ((-90.64284 42.50848, -87.01994 42.493..."
3,Minnesota,5527358,27,2020-03-06,2020-03-21,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2942.0,221.0,200.0,21.0,2020-01-21,2020-04-23,day,"POLYGON ((-97.22872 49.00056, -95.15371 48.998..."
4,Maryland,6003435,24,2020-03-05,2020-03-18,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",15737.0,962.0,680.0,48.0,2020-01-21,2020-04-23,day,"POLYGON ((-77.71952 39.32131, -77.76086 39.340..."


In [22]:
state_gdf.iloc[34]

NAME                                                  Virgin Islands
population                                                         0
fips                                                              78
dt_first_case                                             2020-03-14
dt_first_death                                            2020-04-06
cases_ts           0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....
deaths_ts          0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....
today_case                                                        54
today_new_case                                                     0
today_death                                                        3
today_new_death                                                    0
dt_start                                                  2020-01-21
dt_end                                                    2020-04-23
dt_unit                                                          day
geometry           (POLYGON ((-64.

In [23]:
state_case = state_gdf['today_case']
state_death = state_gdf['today_death']
state_case_per_100k_capita = state_gdf['today_case']/(state_gdf['population']/100000)
state_death_per_100k_capita = state_gdf['today_death']/(state_gdf['population']/100000)
#death_case_ratio = counties_gdf['today_death']/counties_gdf['today_case']
#death_case_ratio = death_case_ratio.replace(np.nan, 0)

In [24]:
state_death_per_100k_capita = state_death_per_100k_capita.replace(np.nan,0)
state_death_per_100k_capita = state_death_per_100k_capita.replace(np.inf,0)
state_case_per_100k_capita = state_case_per_100k_capita.replace(np.nan,0)
state_case_per_100k_capita = state_case_per_100k_capita.replace(np.inf,0)

In [25]:
create_dict(state_case,'case',state_dict)

In [26]:
create_dict(state_death,'death',state_dict)

In [27]:
create_dict(state_case_per_100k_capita,'case_per_100k_capita',state_dict)

In [28]:
create_dict(state_death_per_100k_capita,'death_per_100k_capita',state_dict)

3. Read Illinois County Data

In [29]:
illinois_gdf = gpd.read_file('illinois/illinois_counties_data.geojson')

In [30]:
illinois_gdf.head(5)

Unnamed: 0,NAME,state_name,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,dt_start,dt_end,dt_unit,geometry
0,Clark,Illinois,15836,17023,2020-03-30,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",4.0,0.0,0.0,0.0,2020-01-21,2020-04-22,day,"MULTIPOLYGON (((-88.01421 39.48076, -87.96018 ..."
1,Kendall,Illinois,124626,17093,2020-03-18,2020-03-29,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",157.0,11.0,4.0,1.0,2020-01-21,2020-04-22,day,"MULTIPOLYGON (((-88.60193 41.71956, -88.40450 ..."
2,Brown,Illinois,6675,17009,,,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",0.0,0.0,0.0,0.0,2020-01-21,2020-04-22,day,"MULTIPOLYGON (((-90.91347 40.10446, -90.69582 ..."
3,Warren,Illinois,17338,17187,2020-04-11,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",19.0,4.0,0.0,0.0,2020-01-21,2020-04-22,day,"MULTIPOLYGON (((-90.78519 41.06875, -90.43942 ..."
4,Union,Illinois,17127,17181,2020-04-15,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",5.0,0.0,0.0,0.0,2020-01-21,2020-04-22,day,"MULTIPOLYGON (((-89.50179 37.55890, -89.49775 ..."


In [31]:
illinois_case = illinois_gdf['today_case']
illinois_death = illinois_gdf['today_death']
illinois_case_per_100k_capita = illinois_gdf['today_case']/(illinois_gdf['population']/100000)
illinois_death_per_100k_capita = illinois_gdf['today_death']/(illinois_gdf['population']/100000)
#death_case_ratio = counties_gdf['today_death']/counties_gdf['today_case']
#death_case_ratio = death_case_ratio.replace(np.nan, 0)

In [32]:
illinois_death_per_100k_capita = illinois_death_per_100k_capita.replace(np.nan,0)
illinois_death_per_100k_capita = illinois_death_per_100k_capita.replace(np.inf,0)
illinois_case_per_100k_capita = illinois_case_per_100k_capita.replace(np.nan,0)
illinois_case_per_100k_capita = illinois_case_per_100k_capita.replace(np.inf,0)

In [33]:
create_dict(illinois_case,'case',illinois_dict)

In [34]:
create_dict(illinois_death,'death',illinois_dict)



In [35]:
create_dict(illinois_case_per_100k_capita,'case_per_100k_capita',illinois_dict)

In [36]:
create_dict(illinois_death_per_100k_capita,'death_per_100k_capita',illinois_dict)



Summary and ouput

In [37]:
import json
total_dict['county'] = county_dict
total_dict['state'] = state_dict
total_dict['illinois'] = illinois_dict
dynamic_dict['county'] = dynamic_county_dict
dynamic_dict['state'] = dynamic_state_dict
dynamic_dict['illinois'] = dynamic_illinois_dict
with open('classes.json','w') as json_file:
    json.dump(total_dict, json_file)
with open('dynamic_classes.json','w') as json_file:
    json.dump(dynamic_dict, json_file)
