URL preparation

In [1]:
# Geometry URL
zipcode_geometry_url = "http://www.dph.illinois.gov/sites/default/files/COVID19/il_illinois_zip_codes.json?nocache=1"
county_geometry_url = "http://www.dph.illinois.gov/sites/default/files/Illinois_County_Geo_ch.json"

# Data URL
county_data_url = "http://www.dph.illinois.gov/sitefiles/CountyDemos.json?nocache=1"
zipcode_data_url = "http://www.dph.illinois.gov/sitefiles/COVIDZip.json?nocache=1"
history_data_url = "http://www.dph.illinois.gov/sitefiles/COVIDHistoricalTestResults.json?nocache=1"

In [2]:
import pandas as pd
import json
import numpy as np
import geopandas as gpd
from urllib.request import urlopen

Download data

In [3]:
with urlopen(zipcode_data_url) as response:
    zipcode_dict = json.load(response)
    zipcode_data = pd.DataFrame(zipcode_dict['zip_values'])
    

In [4]:
with urlopen(county_data_url) as response:
    county_dict = json.load(response)
    county_data = pd.DataFrame(county_dict['county_demographics'])
    

In [5]:
with urlopen(history_data_url) as response:
    history_dict = json.load(response)


In [6]:
zipcode_geometry = gpd.read_file(zipcode_geometry_url)
county_geometry = gpd.read_file(county_geometry_url)

In [7]:
with urlopen(zipcode_geometry_url) as response:
    tmp = json.load(response)
    zipcode_gpd = gpd.GeoDataFrame(tmp['features'])

In [8]:
zipcode_gpd['geometry'] = zipcode_geometry
zipcode_gpd = zipcode_gpd[['id','geometry']]
zipcode_gpd['id'] = zipcode_gpd['id'].astype(str)

In [9]:
county_gpd = county_geometry[['id','geometry']]

Zipcode Data generation

In [10]:
zipcode_data

Unnamed: 0,confirmed_cases,demographics,total_tested,zip
0,6,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",96,53142
1,59,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",407,60002
2,161,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1628,60004
3,85,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",492,60005
4,117,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",587,60007
5,144,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",446,60008
6,48,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",466,60010
7,56,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",162,60012
8,49,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",268,60013
9,88,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",654,60014


In [11]:
zipcode_gpd.dtypes

id            object
geometry    geometry
dtype: object

In [12]:
zipcode_final_gpd = pd.merge(zipcode_gpd,zipcode_data, how = 'left', left_on=['id'], right_on=['zip'])

In [13]:
zipcode_final_gpd['confirmed_cases'] = zipcode_final_gpd['confirmed_cases'].replace(np.nan,0)
zipcode_final_gpd['total_tested'] = zipcode_final_gpd['total_tested'].replace(np.nan,0)
zipcode_final_gpd = zipcode_final_gpd[['id','confirmed_cases','total_tested','geometry']]

In [14]:
zipcode_final_gpd.to_file('dph_zipcode_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


Static County Data Generation

In [15]:
county_static = pd.merge(county_gpd, county_data, how="left", left_on="id",right_on="County")

In [16]:
county_static 

Unnamed: 0,id,geometry,County,confirmed_cases,demographics,total_tested
0,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493...",McHenry,814,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",4576
1,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493...",Boone,155,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",628
2,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184...",Ogle,137,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1315
3,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708...",Will,3333,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",17234
4,LaSalle,"POLYGON ((-88.93885 41.62837, -88.93891 41.628...",LaSalle,84,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1424
5,Bureau,"POLYGON ((-89.63155 41.58491, -89.64769 41.584...",Bureau,12,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",331
6,Henry,"POLYGON ((-89.86249 41.58401, -89.87517 41.584...",Henry,54,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",638
7,Grundy,"POLYGON ((-88.25217 41.46277, -88.27149 41.462...",Grundy,48,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",733
8,Mercer,"POLYGON ((-90.43382 41.32698, -90.45335 41.326...",Mercer,10,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",163
9,Putnam,"POLYGON ((-89.16353 41.30991, -89.16500 41.309...",Putnam,0,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",48


In [17]:
county_static.to_file('dph_county_static_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


County Data generation


In [18]:
# transform to new york times format
county_history = pd.DataFrame(history_dict['historical_county']['values'])
l = []
for case in history_dict['historical_county']['values']:
    testDate = case['testDate']
    values = case['values']
    for x in values:
        x['date'] = testDate
        l.append(x)
county_history = pd.DataFrame(l)

In [19]:
# eliminate unassigned data
county_history = county_history[county_history['County'] != 'Unassigned']

In [None]:
def standardDate(str):
    l = str.split('/')
    month = l[0]
    day = l[1]
    year = l[2]
    if len(month) < 2:
        month = '0' + month
    if len(day) < 2:
        day = '0' + day
    return year + '-' + month + '-' + day

In [187]:
# pivot table
county_pivot = pd.pivot_table(county_history, index=['County'],columns=['date'])

In [188]:
# Standardized Date Format
county_cases = county_pivot['confirmed_cases']

In [189]:
county_cases = county_cases.rename(columns=standardDate)

In [190]:
# Add missing 03/23 data with 03/22
county_cases['2020-03-23'] = county_cases['2020-03-22']

In [194]:
county_cases.head(1)

date,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-24,2020-03-25,2020-03-26,2020-03-27,...,2020-04-07,2020-04-08,2020-04-09,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-03-23
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adams,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,9.0,11.0,22.0,40.0,40.0,40.0,40.0,40.0,40.0,1.0


In [192]:
# Get date information
date = county_cases.columns.tolist()
date.sort()
dt_first = date[0]
dt_today = date[-1]
dt_yesterday = date[-2]

In [196]:
# reorder date
county_cases = county_cases[date]

In [197]:
# get case time series
cases_ts = county_cases.values.tolist()

In [198]:
# get first case date
county_cases['dt_first_case'] = (county_cases > 0).idxmax(axis=1)
county_cases.loc[county_cases.iloc[:, -2] <= 0, 'dt_first_case'] = np.nan

In [199]:
county_cases['cases_ts'] = cases_ts

In [200]:
# Add today_case and today_new_case columns
county_cases['today_case'] = county_cases[dt_today]
county_cases['today_new_case'] = county_cases[dt_today] - county_cases[dt_yesterday]

In [201]:
# Reformat
county_report = county_cases[['cases_ts','dt_first_case','today_case','today_new_case']]
county_report = county_report.reset_index()
county_report.columns = ['County','cases_ts','dt_first_case','today_case','today_new_case']
county_report['cases_ts'] = county_report['cases_ts'].apply(lambda x: ','.join(map(str,x)))

In [202]:
np.setdiff1d(county_report['County'],county_gpd['id'])
# Illinois, Out of State and Suburban Cook is not in geometry

array(['Illinois', 'Out Of State', 'Suburban Cook'], dtype=object)

In [203]:
county_report = county_report[(county_report['County'] != 'Illinois') & (county_report['County'] != 'Out of State') & (county_report['County'] != 'Suburban Cook')]



In [204]:
county_report['dt_start'] = dt_first
county_report['dt_end'] = dt_today
county_report['dt_unit'] = 'day'

In [205]:
county_report

Unnamed: 0,County,cases_ts,dt_first_case,today_case,today_new_case,dt_start,dt_end,dt_unit
0,Adams,"0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1....",2020-03-20,40.0,0.0,2020-03-17,2020-05-06,day
1,Alexander,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2020-04-16,6.0,2.0,2020-03-17,2020-05-06,day
2,Bond,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2020-03-29,8.0,1.0,2020-03-17,2020-05-06,day
3,Boone,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2020-04-05,155.0,9.0,2020-03-17,2020-05-06,day
4,Brown,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2020-05-01,6.0,4.0,2020-03-17,2020-05-06,day
5,Bureau,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1....",2020-03-27,12.0,0.0,2020-03-17,2020-05-06,day
6,Calhoun,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2020-04-05,1.0,0.0,2020-03-17,2020-05-06,day
7,Carroll,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1....",2020-03-28,10.0,0.0,2020-03-17,2020-05-06,day
8,Cass,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",2020-04-20,48.0,6.0,2020-03-17,2020-05-06,day
9,Champaign,"1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,4.0,8.0,10.0,1...",2020-03-17,212.0,0.0,2020-03-17,2020-05-06,day


In [206]:
county_final_gpd = pd.merge(county_gpd, county_report, how="left", left_on="id", right_on="County")

In [207]:
county_final_gpd['population'] = 1

In [208]:
county_final_gpd.to_file('dph_county_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')


done
