URL preparation

In [68]:
# Geometry URL
zipcode_geometry_url = "http://www.dph.illinois.gov/sites/default/files/COVID19/il_illinois_zip_codes.json?nocache=1"
county_geometry_url = "http://www.dph.illinois.gov/sites/default/files/Illinois_County_Geo_ch.json"

# Data URL
county_data_url = "http://www.dph.illinois.gov/sitefiles/CountyDemos.json?nocache=1"
zipcode_data_url = "http://www.dph.illinois.gov/sitefiles/COVIDZip.json?nocache=1"
history_data_url = "http://www.dph.illinois.gov/sitefiles/COVIDHistoricalTestResults.json?nocache=1"

In [69]:
import pandas as pd
import json
import numpy as np
import geopandas as gpd
from urllib.request import urlopen

Download data

In [70]:
with urlopen(zipcode_data_url) as response:
    zipcode_dict = json.load(response)
    zipcode_data = pd.DataFrame(zipcode_dict['zip_values'])
    

In [71]:
with urlopen(county_data_url) as response:
    county_dict = json.load(response)
    county_data = pd.DataFrame(county_dict['county_demographics'])
    

In [72]:
with urlopen(history_data_url) as response:
    history_dict = json.load(response)


In [73]:
zipcode_geometry = gpd.read_file(zipcode_geometry_url)
county_geometry = gpd.read_file(county_geometry_url)

In [74]:
with urlopen(zipcode_geometry_url) as response:
    tmp = json.load(response)
    zipcode_gpd = gpd.GeoDataFrame(tmp['features'])

In [75]:
zipcode_gpd['geometry'] = zipcode_geometry
zipcode_gpd = zipcode_gpd[['id','geometry']]
zipcode_gpd['id'] = zipcode_gpd['id'].astype(str)

In [76]:
county_gpd = county_geometry[['id','geometry']]

Zipcode Data generation

In [77]:
zipcode_data

Unnamed: 0,confirmed_cases,demographics,total_tested,zip
0,6,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",96,53142
1,59,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",407,60002
2,161,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1628,60004
3,85,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",492,60005
4,117,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",587,60007
5,144,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",446,60008
6,48,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",466,60010
7,56,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",162,60012
8,49,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",268,60013
9,88,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",654,60014


In [78]:
zipcode_gpd.dtypes

id            object
geometry    geometry
dtype: object

In [79]:
zipcode_final_gpd = pd.merge(zipcode_gpd,zipcode_data, how = 'left', left_on=['id'], right_on=['zip'])

In [80]:
zipcode_final_gpd['confirmed_cases'] = zipcode_final_gpd['confirmed_cases'].replace(np.nan,0)
zipcode_final_gpd['total_tested'] = zipcode_final_gpd['total_tested'].replace(np.nan,0)
zipcode_final_gpd = zipcode_final_gpd[['id','confirmed_cases','total_tested','geometry']]

In [81]:
zipcode_final_gpd.to_file('dph_zipcode_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


Static County Data Generation

In [82]:
county_static = pd.merge(county_gpd, county_data, how="left", left_on="id",right_on="County")

In [100]:
county_static 

Unnamed: 0,id,geometry,County,confirmed_cases,demographics,total_tested
0,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493...",McHenry,814,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",4576
1,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493...",Boone,155,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",628
2,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184...",Ogle,137,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1315
3,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708...",Will,3333,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",17234
4,LaSalle,"POLYGON ((-88.93885 41.62837, -88.93891 41.628...",LaSalle,84,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1424
5,Bureau,"POLYGON ((-89.63155 41.58491, -89.64769 41.584...",Bureau,12,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",331
6,Henry,"POLYGON ((-89.86249 41.58401, -89.87517 41.584...",Henry,54,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",638
7,Grundy,"POLYGON ((-88.25217 41.46277, -88.27149 41.462...",Grundy,48,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",733
8,Mercer,"POLYGON ((-90.43382 41.32698, -90.45335 41.326...",Mercer,10,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",163
9,Putnam,"POLYGON ((-89.16353 41.30991, -89.16500 41.309...",Putnam,0,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",48


In [83]:
county_static.to_file('dph_county_static_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


County Data generation


In [84]:
# transform to new york times format
county_history = pd.DataFrame(history_dict['historical_county']['values'])
l = []
for case in history_dict['historical_county']['values']:
    testDate = case['testDate']
    values = case['values']
    for x in values:
        x['date'] = testDate
        l.append(x)
county_history = pd.DataFrame(l)

In [85]:
# eliminate unassigned data
county_history = county_history[county_history['County'] != 'Unassigned']

In [86]:
# pivot table
county_pivot = pd.pivot_table(county_history, index=['County'],columns=['date'])

In [87]:
# get case time series
county_pivot['cases_ts'] = county_pivot['confirmed_cases'].values.tolist()

In [88]:
# get first case date
county_pivot['dt_first_case'] = (county_pivot['confirmed_cases'] > 0).idxmax(axis=1)
county_pivot.loc[county_pivot['confirmed_cases'].iloc[:, -1] <= 0, 'dt_first_case'] = np.nan

In [89]:
# Get date information
date = county_pivot['confirmed_cases'].columns
dt_first = date[0]
dt_today = date[-1]
dt_yesterday = date[-2]

In [90]:
date

Index(['3/17/2020', '3/18/2020', '3/19/2020', '3/20/2020', '3/21/2020',
       '3/22/2020', '3/24/2020', '3/25/2020', '3/26/2020', '3/27/2020',
       '3/28/2020', '3/29/2020', '3/30/2020', '3/31/2020', '4/1/2020',
       '4/10/2020', '4/11/2020', '4/12/2020', '4/13/2020', '4/14/2020',
       '4/15/2020', '4/16/2020', '4/17/2020', '4/18/2020', '4/19/2020',
       '4/2/2020', '4/20/2020', '4/21/2020', '4/22/2020', '4/23/2020',
       '4/24/2020', '4/25/2020', '4/26/2020', '4/27/2020', '4/28/2020',
       '4/29/2020', '4/3/2020', '4/30/2020', '4/4/2020', '4/5/2020',
       '4/6/2020', '4/7/2020', '4/8/2020', '4/9/2020', '5/1/2020', '5/2/2020',
       '5/3/2020', '5/4/2020', '5/5/2020', '5/6/2020'],
      dtype='object', name='date')

In [91]:
# Add today_case and today_new_case columns
county_pivot['today_case'] = county_pivot['confirmed_cases'][dt_today]
county_pivot['today_new_case'] = county_pivot['confirmed_cases'][dt_today] - county_pivot['confirmed_cases'][dt_yesterday]

In [92]:
# Reformat
county_report = county_pivot[['cases_ts','dt_first_case','today_case','today_new_case']]
county_report = county_report.reset_index()
county_report.columns = ['County','cases_ts','dt_first_case','today_case','today_new_case']
county_report['cases_ts'] = county_report['cases_ts'].apply(lambda x: ','.join(map(str,x)))

In [93]:
np.setdiff1d(county_report['County'],county_gpd['id'])
# Illinois, Out of State and Suburban Cook is not in geometry

array(['Illinois', 'Out Of State', 'Suburban Cook'], dtype=object)

In [94]:
county_report = county_report[(county_report['County'] != 'Illinois') & (county_report['County'] != 'Out of State') & (county_report['County'] != 'Suburban Cook')]

In [95]:
def standardDate(str):
    l = str.split('/')
    month = l[0]
    day = l[1]
    year = l[2]
    if len(month) < 2:
        month = '0' + month
    if len(day) < 2:
        day = '0' + day
    return year + '-' + month + '-' + day

In [96]:
county_report['dt_first_case'] = county_report['dt_first_case'].apply(lambda x: standardDate(x) if type(x) == str else x)
county_report['dt_start'] = standardDate(dt_first)
county_report['dt_end'] = standardDate(dt_today)
county_report['dt_unit'] = 'day'

In [97]:
county_final_gpd = pd.merge(county_gpd, county_report, how="left", left_on="id", right_on="County")

In [98]:
county_final_gpd['population'] = 1

In [99]:
county_final_gpd.to_file('dph_county_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')


done
