In [1]:
import geopandas as gpd
import numpy as np
import mapclassify as mc
import matplotlib.pyplot as plt

In [2]:
total_dict = {}
county_dict = {}
state_dict = {}
illinois_dict = {}

In [3]:
dynamic_dict = {}
dynamic_county_dict = {}
dynamic_state_dict = {}
dynamic_illinois_dict = {}

1. Read County Data

In [4]:
counties_gdf = gpd.read_file('nyt_counties_data.geojson')

In [5]:
counties_gdf.head(5)

Unnamed: 0,NAME,state_name,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,dt_start,dt_end,dt_unit,geometry
0,Bladen,North Carolina,33778,37017,2020-03-29,,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",10.0,0.0,0.0,0.0,2020-01-21,2020-04-27,day,"MULTIPOLYGON (((-78.90200 34.83527, -78.79960 ..."
1,Stanly,North Carolina,61114,37167,2020-03-20,2020-04-09,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",29.0,1.0,4.0,0.0,2020-01-21,2020-04-27,day,"MULTIPOLYGON (((-80.49738 35.20210, -80.29542 ..."
2,Summit,Ohio,541810,39153,2020-03-14,2020-03-27,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",500.0,15.0,39.0,1.0,2020-01-21,2020-04-27,day,"MULTIPOLYGON (((-81.68699 41.13596, -81.68495 ..."
3,Sullivan,Pennsylvania,6177,42113,2020-04-04,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",1.0,0.0,0.0,0.0,2020-01-21,2020-04-27,day,"MULTIPOLYGON (((-76.81373 41.59003, -76.22014 ..."
4,Upshur,Texas,40769,48459,2020-03-20,,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",13.0,0.0,0.0,0.0,2020-01-21,2020-04-27,day,"MULTIPOLYGON (((-95.15274 32.66095, -95.15211 ..."


In [6]:
case = counties_gdf['today_case']
death = counties_gdf['today_death']
case_per_100k_capita = counties_gdf['today_case']/(counties_gdf['population']/100000)
death_per_100k_capita = counties_gdf['today_death']/(counties_gdf['population']/100000)
death_case_ratio = counties_gdf['today_death']/counties_gdf['today_case']
death_case_ratio = death_case_ratio.replace(np.nan, 0)

In [7]:
log_case = case.apply(lambda x: np.log(x + 1))
log_death = death.apply(lambda x: np.log(x + 1))
log_case_per_100k_capita = case_per_100k_capita.apply(lambda x: np.log(x + 1))
log_death_per_100k_capita = death.apply(lambda x: np.log(x + 1))



1.1 Get Constant Classes from today's data

In [8]:
def create_dict(column,name,dict):
    tmp_dict = {}
    tmp_dict['Quantiles'] = {
        'bins': ','.join(mc.Quantiles(column, k=6).bins.astype(str)),
        'k': 6
    }
    tmp_dict['FisherJenks'] = {
        'bins': ','.join(mc.FisherJenks(column, k=6).bins.astype(str)),
        'k': 6
    }
    tmp_dict['NaturalBreaks'] = {
        'bins': ','.join(mc.NaturalBreaks(column, k=6).bins.astype(str)),
        'k': 6
    }
    dict[name] = tmp_dict

In [9]:
create_dict(case,'case',county_dict)
create_dict(log_case,'log_case',county_dict)
create_dict(log_death,'log_death',county_dict)
create_dict(log_case_per_100k_capita,"log_case_per_100k_capita",county_dict)
create_dict(log_death_per_100k_capita,"log_death_per_100k_capita",county_dict)



In [10]:
create_dict(death,'death',county_dict)



In [11]:
create_dict(case_per_100k_capita,'case_per_100k_capita',county_dict)

In [12]:
create_dict(death_per_100k_capita,'death_per_100k_capita',county_dict)



In [13]:
create_dict(death_case_ratio,'death_case_ratio',county_dict)



In [14]:
county_dict

{'case': {'Quantiles': {'bins': '1.0,5.0,13.0,38.0,138.0,160499.0', 'k': 6},
  'FisherJenks': {'bins': '1140.0,3892.0,8253.0,20417.0,34865.0,160499.0',
   'k': 6},
  'NaturalBreaks': {'bins': '1130.0,3892.0,8253.0,20417.0,34865.0,160499.0',
   'k': 6}},
 'log_case': {'Quantiles': {'bins': '0.6931471805599453,1.791759469228055,2.6390573296152584,3.6635616461296463,4.9344739331306915,11.986049221552207',
   'k': 6},
  'FisherJenks': {'bins': '0.6931471805599453,2.1972245773362196,3.5263605246161616,4.955827057601261,6.878326468291325,11.986049221552207',
   'k': 6},
  'NaturalBreaks': {'bins': '0.6931471805599453,2.1972245773362196,3.4965075614664802,4.912654885736052,6.8308742346461795,11.986049221552207',
   'k': 6}},
 'log_death': {'Quantiles': {'bins': '0.0,0.6931471805599453,1.9459101490553132,9.380758024267314',
   'k': 6},
  'FisherJenks': {'bins': '0.0,1.0986122886681098,2.1972245773362196,3.4011973816621555,5.056245805348308,9.380758024267314',
   'k': 6},
  'NaturalBreaks': {'b

1.2 Create Dynamic classes

In [15]:
case_ts = counties_gdf['cases_ts'].apply(lambda x: x.split(','))
death_ts = counties_gdf['deaths_ts'].apply(lambda x: x.split(','))

In [16]:
length = len(case_ts[1])

In [17]:
new_case_dict = {}
for i in range(1,length):
    q6 = mc.Quantiles(case_ts.apply(lambda x: int(float(x[i]) - float(x[i - 1]))).values, k=6)
    new_case_dict[i] = ','.join(q6.bins.astype(int).astype(str))

  gadf = 1 - self.adcm / adam


In [18]:
new_death_dict = {}
for i in range(1,length):
    d6 = mc.Quantiles(death_ts.apply(lambda x: int(float(x[i]) - float(x[i - 1]))).values, k=6)
    new_death_dict[i] = ','.join(d6.bins.astype(int).astype(str))

In [19]:
dynamic_county_dict['new_case'] = new_case_dict
dynamic_county_dict['new_death'] = new_death_dict


2.Read State Data

In [20]:
state_gdf = gpd.read_file('nyt_states_data.geojson')

In [21]:
state_gdf.head(5)

Unnamed: 0,NAME,population,fips,dt_first_case,dt_first_death,cases_ts,deaths_ts,today_case,today_new_case,today_death,today_new_death,dt_start,dt_end,dt_unit,geometry
0,West Virginia,1829054,54,2020-03-17,2020-03-29,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",1077.0,24.0,37.0,3.0,2020-01-21,2020-04-27,day,"POLYGON ((-80.51899 40.63880, -80.51934 39.721..."
1,Florida,20598139,12,2020-03-01,2020-03-06,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",32130.0,610.0,1087.0,14.0,2020-01-21,2020-04-27,day,"MULTIPOLYGON (((-82.98748 24.62538, -82.89993 ..."
2,Illinois,12821497,17,2020-01-24,2020-03-17,"0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",45883.0,1980.0,1992.0,49.0,2020-01-21,2020-04-27,day,"POLYGON ((-90.64284 42.50848, -87.01994 42.493..."
3,Minnesota,5527358,27,2020-03-06,2020-03-21,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",3816.0,214.0,286.0,14.0,2020-01-21,2020-04-27,day,"POLYGON ((-97.22872 49.00056, -95.15371 48.998..."
4,Maryland,6003435,24,2020-03-05,2020-03-18,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",19487.0,906.0,858.0,31.0,2020-01-21,2020-04-27,day,"POLYGON ((-77.71952 39.32131, -77.76086 39.340..."


In [22]:
state_gdf.iloc[34]

NAME                                                  Virgin Islands
population                                                         0
fips                                                              78
dt_first_case                                             2020-03-14
dt_first_death                                            2020-04-06
cases_ts           0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....
deaths_ts          0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....
today_case                                                        59
today_new_case                                                     2
today_death                                                        4
today_new_death                                                    0
dt_start                                                  2020-01-21
dt_end                                                    2020-04-27
dt_unit                                                          day
geometry           (POLYGON ((-64.

In [23]:
state_case = state_gdf['today_case']
state_death = state_gdf['today_death']
state_case_per_100k_capita = state_gdf['today_case']/(state_gdf['population']/100000)
state_death_per_100k_capita = state_gdf['today_death']/(state_gdf['population']/100000)
#death_case_ratio = counties_gdf['today_death']/counties_gdf['today_case']
#death_case_ratio = death_case_ratio.replace(np.nan, 0)

In [24]:
state_death_per_100k_capita = state_death_per_100k_capita.replace(np.nan,0)
state_death_per_100k_capita = state_death_per_100k_capita.replace(np.inf,0)
state_case_per_100k_capita = state_case_per_100k_capita.replace(np.nan,0)
state_case_per_100k_capita = state_case_per_100k_capita.replace(np.inf,0)

In [25]:
create_dict(state_case,'case',state_dict)

  binIds += (x > l) * (x <= r) * k
  binIds += (x > l) * (x <= r) * k
  r = func(a, **kwargs)


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
create_dict(state_death,'death',state_dict)

In [None]:
create_dict(state_case_per_100k_capita,'case_per_100k_capita',state_dict)

In [None]:
create_dict(state_death_per_100k_capita,'death_per_100k_capita',state_dict)

3. Read Illinois County Data

In [None]:
illinois_gdf = gpd.read_file('illinois/nyt_illinois_counties_data.geojson')

In [None]:
illinois_gdf.head(5)

In [None]:
illinois_case = illinois_gdf['today_case']
illinois_death = illinois_gdf['today_death']
illinois_case_per_100k_capita = illinois_gdf['today_case']/(illinois_gdf['population']/100000)
illinois_death_per_100k_capita = illinois_gdf['today_death']/(illinois_gdf['population']/100000)
#death_case_ratio = counties_gdf['today_death']/counties_gdf['today_case']
#death_case_ratio = death_case_ratio.replace(np.nan, 0)

In [None]:
illinois_death_per_100k_capita = illinois_death_per_100k_capita.replace(np.nan,0)
illinois_death_per_100k_capita = illinois_death_per_100k_capita.replace(np.inf,0)
illinois_case_per_100k_capita = illinois_case_per_100k_capita.replace(np.nan,0)
illinois_case_per_100k_capita = illinois_case_per_100k_capita.replace(np.inf,0)

In [None]:
create_dict(illinois_case,'case',illinois_dict)

In [None]:
create_dict(illinois_death,'death',illinois_dict)

In [None]:
create_dict(illinois_case_per_100k_capita,'case_per_100k_capita',illinois_dict)

In [None]:
create_dict(illinois_death_per_100k_capita,'death_per_100k_capita',illinois_dict)

Summary and ouput

In [None]:
import json
total_dict['county'] = county_dict
total_dict['state'] = state_dict
total_dict['illinois'] = illinois_dict
dynamic_dict['county'] = dynamic_county_dict
dynamic_dict['state'] = dynamic_state_dict
dynamic_dict['illinois'] = dynamic_illinois_dict
with open('classes.json','w') as json_file:
    json.dump(total_dict, json_file)
with open('dynamic_classes.json','w') as json_file:
    json.dump(dynamic_dict, json_file)
