URL preparation

In [2]:
# Geometry URL
zipcode_geometry_url = "http://www.dph.illinois.gov/sites/default/files/COVID19/il_illinois_zip_codes.json?nocache=1"
county_geometry_url = "http://www.dph.illinois.gov/sites/default/files/Illinois_County_Geo_ch.json"

# Data URL
county_data_url = "http://www.dph.illinois.gov/sitefiles/CountyDemos.json?nocache=1"
zipcode_data_url = "http://www.dph.illinois.gov/sitefiles/COVIDZip.json?nocache=1"
history_data_url = "http://www.dph.illinois.gov/sitefiles/COVIDHistoricalTestResults.json?nocache=1"

In [3]:
import pandas as pd
import json
import numpy as np
import geopandas as gpd
from urllib.request import urlopen

Download data

In [4]:
with urlopen(zipcode_data_url) as response:
    zipcode_dict = json.load(response)
    zipcode_data = pd.DataFrame(zipcode_dict['zip_values'])
    

In [5]:
with urlopen(county_data_url) as response:
    county_dict = json.load(response)
    county_data = pd.DataFrame(county_dict['county_demographics'])
    

In [6]:
with urlopen(history_data_url) as response:
    history_dict = json.load(response)


In [7]:
zipcode_geometry = gpd.read_file(zipcode_geometry_url)
county_geometry = gpd.read_file(county_geometry_url)

In [8]:
with urlopen(zipcode_geometry_url) as response:
    tmp = json.load(response)
    zipcode_gpd = gpd.GeoDataFrame(tmp['features'])

In [9]:
zipcode_gpd['geometry'] = zipcode_geometry
zipcode_gpd = zipcode_gpd[['id','geometry']]
zipcode_gpd['id'] = zipcode_gpd['id'].astype(str)

In [10]:
county_gpd = county_geometry[['id','geometry']]

Zipcode Data generation

In [11]:
zipcode_data

Unnamed: 0,confirmed_cases,demographics,total_tested,zip
0,6,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",111,53142
1,65,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",443,60002
2,178,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1723,60004
3,90,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",522,60005
4,125,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",643,60007
5,161,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",499,60008
6,54,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",524,60010
7,59,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",174,60012
8,53,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",295,60013
9,99,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",717,60014


In [12]:
zipcode_gpd.dtypes

id            object
geometry    geometry
dtype: object

In [13]:
zipcode_final_gpd = pd.merge(zipcode_gpd,zipcode_data, how = 'left', left_on=['id'], right_on=['zip'])

In [14]:
zipcode_final_gpd['confirmed_cases'] = zipcode_final_gpd['confirmed_cases'].replace(np.nan,0)
zipcode_final_gpd['total_tested'] = zipcode_final_gpd['total_tested'].replace(np.nan,0)
zipcode_final_gpd = zipcode_final_gpd[['id','confirmed_cases','total_tested','geometry']]

In [15]:
zipcode_final_gpd.to_file('dph_zipcode_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


Static County Data Generation

In [16]:
county_static = pd.merge(county_gpd, county_data, how="left", left_on="id",right_on="County")

In [17]:
county_static 

Unnamed: 0,id,geometry,County,confirmed_cases,demographics,total_tested
0,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493...",McHenry,897,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",5052
1,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493...",Boone,177,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",702
2,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184...",Ogle,141,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1381
3,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708...",Will,3561,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",18707
4,LaSalle,"POLYGON ((-88.93885 41.62837, -88.93891 41.628...",LaSalle,93,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",1548
5,Bureau,"POLYGON ((-89.63155 41.58491, -89.64769 41.584...",Bureau,13,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",366
6,Henry,"POLYGON ((-89.86249 41.58401, -89.87517 41.584...",Henry,59,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",692
7,Grundy,"POLYGON ((-88.25217 41.46277, -88.27149 41.462...",Grundy,55,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",773
8,Mercer,"POLYGON ((-90.43382 41.32698, -90.45335 41.326...",Mercer,11,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",184
9,Putnam,"POLYGON ((-89.16353 41.30991, -89.16500 41.309...",Putnam,0,"{'age': [{'age_group': 'Unknown', 'count': 0, ...",55


In [18]:
county_static.to_file('dph_county_static_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


County Data generation


In [19]:
# transform to new york times format
county_history = pd.DataFrame(history_dict['historical_county']['values'])
l = []
for case in history_dict['historical_county']['values']:
    testDate = case['testDate']
    values = case['values']
    for x in values:
        x['date'] = testDate
        l.append(x)
county_history = pd.DataFrame(l)

In [20]:
# eliminate unassigned data
county_history = county_history[county_history['County'] != 'Unassigned']

In [21]:
def standardDate(str):
    l = str.split('/')
    month = l[0]
    day = l[1]
    year = l[2]
    if len(month) < 2:
        month = '0' + month
    if len(day) < 2:
        day = '0' + day
    return year + '-' + month + '-' + day

In [22]:
# pivot table
county_pivot = pd.pivot_table(county_history, index=['County'],columns=['date'])

In [23]:
# Standardized Date Format
county_cases = county_pivot['confirmed_cases']

In [24]:
county_cases = county_cases.rename(columns=standardDate)

In [25]:
# Add missing 03/23 data with 03/22
county_cases['2020-03-23'] = county_cases['2020-03-22']

In [26]:
county_cases.head(1)

date,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-24,2020-03-25,2020-03-26,2020-03-27,...,2020-04-09,2020-05-01,2020-05-02,2020-05-03,2020-05-04,2020-05-05,2020-05-06,2020-05-07,2020-05-08,2020-03-23
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adams,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,22.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,41.0,1.0


In [27]:
# Get date information
date = county_cases.columns.tolist()
date.sort()
dt_first = date[0]
dt_today = date[-1]
dt_yesterday = date[-2]

In [51]:
from datetime import datetime
from datetime import timedelta

In [60]:
def find_missing_date(date):
    dt_range = datetime.strptime(date[-1], "%Y-%m-%d") - datetime.strptime(date[0], "%Y-%m-%d")
    if len(date) != dt_range.days + 1:
        print('Alert! Missing Date or Redundant date')
        for x in range(len(date) - 1):
            if (datetime.strptime(date[x + 1], "%Y-%m-%d") - datetime.strptime(date[x], "%Y-%m-%d")).days != 1:
                missing_date = datetime.strptime(date[x], "%Y-%m-%d") + timedelta(days=1)
                print('Missing Date:')
                print(missing_date)

In [64]:
# Looking for missing dates
find_missing_date(date)

In [None]:
# reorder date
county_cases = county_cases[date]

In [None]:
# get case time series
cases_ts = county_cases.values.tolist()

In [None]:
# get first case date
county_cases['dt_first_case'] = (county_cases > 0).idxmax(axis=1)
county_cases.loc[county_cases.iloc[:, -2] <= 0, 'dt_first_case'] = np.nan

In [None]:
county_cases['cases_ts'] = cases_ts

In [None]:
# Add today_case and today_new_case columns
county_cases['today_case'] = county_cases[dt_today]
county_cases['today_new_case'] = county_cases[dt_today] - county_cases[dt_yesterday]

In [None]:
# Reformat
county_report = county_cases[['cases_ts','dt_first_case','today_case','today_new_case']]
county_report = county_report.reset_index()
county_report.columns = ['County','cases_ts','dt_first_case','today_case','today_new_case']
county_report['cases_ts'] = county_report['cases_ts'].apply(lambda x: ','.join(map(str,x)))

In [None]:
np.setdiff1d(county_report['County'],county_gpd['id'])
# Illinois, Out of State and Suburban Cook is not in geometry

In [None]:
county_report = county_report[(county_report['County'] != 'Illinois') & (county_report['County'] != 'Out of State') & (county_report['County'] != 'Suburban Cook')]



In [None]:
county_report['dt_start'] = dt_first
county_report['dt_end'] = dt_today
county_report['dt_unit'] = 'day'

In [None]:
county_report

In [None]:
county_final_gpd = pd.merge(county_gpd, county_report, how="left", left_on="id", right_on="County")

In [None]:
county_final_gpd['population'] = 1

In [None]:
county_final_gpd.to_file('dph_county_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')
