URL preparation

In [122]:
# Geometry URL
zipcode_geometry_url = "http://www.dph.illinois.gov/sites/default/files/COVID19/il_illinois_zip_codes.json?nocache=1"
county_geometry_url = "http://www.dph.illinois.gov/sites/default/files/Illinois_County_Geo_ch.json"

In [123]:
import pandas as pd
import json
import numpy as np
import geopandas as gpd
from urllib.request import urlopen
import shapely.wkt

Download data

In [124]:
with open("./idph_COVIDZip.json") as f:
    zipcode_dict = json.load(f)
    zipcode_data = pd.DataFrame(zipcode_dict['zip_values'])

In [125]:
with open("./idph_COVIDHistoricalTestResults.json") as f:
    history_dict = json.load(f)

In [126]:
zipcode_geometry = gpd.read_file(zipcode_geometry_url)
county_geometry = gpd.read_file(county_geometry_url)

In [127]:
with urlopen(zipcode_geometry_url) as response:
    tmp = json.load(response)
    zipcode_gpd = gpd.GeoDataFrame(tmp['features'])

In [128]:
zipcode_gpd['geometry'] = zipcode_geometry
zipcode_gpd = zipcode_gpd[['id','geometry']]
zipcode_gpd['id'] = zipcode_gpd['id'].astype(str)

In [129]:
county_geometry

Unnamed: 0,id,OBJECTID,County,Percent_1,Shape__Area,Shape__Length,geometry
0,McHenry,1,McHenry County,27.7,17027625788.537001,522324.305015,"POLYGON ((-88.70742 42.49352, -88.70741 42.493..."
1,Boone,2,Boone County,33.7,7862723082.688600,375171.144577,"POLYGON ((-88.70742 42.49352, -88.70750 42.493..."
2,Ogle,3,Ogle County,31.2,21284584781.190300,651729.421361,"POLYGON ((-89.68809 42.19950, -89.68807 42.184..."
3,Will,4,Will County,28.0,23669428994.175301,779085.214831,"POLYGON ((-88.26146 41.72439, -88.26103 41.708..."
4,LaSalle,5,LaSalle County,27.1,32008439057.989899,825360.163913,"POLYGON ((-88.93885 41.62837, -88.93891 41.628..."
...,...,...,...,...,...,...,...
98,Lawrence,99,Lawrence County,28.9,10426836159.952499,442943.679766,"POLYGON ((-87.90806 38.85013, -87.90819 38.835..."
99,Marion,100,Marion County,33.5,16056558800.880800,509529.795774,"POLYGON ((-89.13844 38.73633, -89.13847 38.721..."
100,Union,101,Union County,28.2,11769540678.443001,489617.647490,"POLYGON ((-89.04143 37.59650, -89.06017 37.597..."
101,Pope,102,Pope County,29.5,10432553057.307899,537878.380731,"POLYGON ((-88.70860 37.59926, -88.70876 37.584..."


In [130]:
target_row = 102
# Move target row to first element of list.
idx = [target_row] + [i for i in range(len(county_geometry)) if i != target_row]
county_geometry = county_geometry.iloc[idx]
county_geometry = county_geometry.reset_index(drop=True)
county_geometry

Unnamed: 0,id,OBJECTID,County,Percent_1,Shape__Area,Shape__Length,geometry
0,Chicago,103,Chicago,28.2,6450276623.31,845282.931362,"MULTIPOLYGON (((-87.93514 42.00089, -87.93521 ..."
1,McHenry,1,McHenry County,27.7,17027625788.537001,522324.305015,"POLYGON ((-88.70742 42.49352, -88.70741 42.493..."
2,Boone,2,Boone County,33.7,7862723082.688600,375171.144577,"POLYGON ((-88.70742 42.49352, -88.70750 42.493..."
3,Ogle,3,Ogle County,31.2,21284584781.190300,651729.421361,"POLYGON ((-89.68809 42.19950, -89.68807 42.184..."
4,Will,4,Will County,28.0,23669428994.175301,779085.214831,"POLYGON ((-88.26146 41.72439, -88.26103 41.708..."
...,...,...,...,...,...,...,...
98,Jefferson,98,Jefferson County,30.2,16276278165.520500,510233.041489,"POLYGON ((-89.14445 38.47387, -89.14462 38.460..."
99,Lawrence,99,Lawrence County,28.9,10426836159.952499,442943.679766,"POLYGON ((-87.90806 38.85013, -87.90819 38.835..."
100,Marion,100,Marion County,33.5,16056558800.880800,509529.795774,"POLYGON ((-89.13844 38.73633, -89.13847 38.721..."
101,Union,101,Union County,28.2,11769540678.443001,489617.647490,"POLYGON ((-89.04143 37.59650, -89.06017 37.597..."


In [131]:
county_gpd = county_geometry[['id','geometry']]

In [132]:
polygon1 = shapely.wkt.loads('POLYGON ((-89.6536529004544 39.7828177522233, -89.6536529004544 39.7799814663649, -89.64987635016143 39.7799814663649, -89.64987635016143 39.7828177522233, -89.6536529004544 39.7828177522233))')
polygon2 = shapely.wkt.loads('POLYGON ((-89.64923261999786 39.78268583454302, -89.64923261999786 39.7799814663649, -89.64575647711456 39.7799814663649, -89.64575647711456 39.78268583454302, -89.64923261999786 39.78268583454302))')

In [133]:
# county_gpd_dynamic = county_gpd.append([{'id':"Out Of State", 'geometry':polygon1}], ignore_index=True)
# county_gpd_dynamic = county_gpd_dynamic.append([{'id':"Unassigned", 'geometry':polygon2}], ignore_index=True)
county_gpd_dynamic = county_gpd.append([{'id':"Illinois", 'geometry':polygon2}], ignore_index=True)

In [134]:
county_gpd_dynamic

Unnamed: 0,id,geometry
0,Chicago,"MULTIPOLYGON (((-87.93514 42.00089, -87.93521 ..."
1,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493..."
2,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493..."
3,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184..."
4,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708..."
...,...,...
99,Lawrence,"POLYGON ((-87.90806 38.85013, -87.90819 38.835..."
100,Marion,"POLYGON ((-89.13844 38.73633, -89.13847 38.721..."
101,Union,"POLYGON ((-89.04143 37.59650, -89.06017 37.597..."
102,Pope,"POLYGON ((-88.70860 37.59926, -88.70876 37.584..."


Zipcode Data generation

In [135]:
zipcode_data

Unnamed: 0,zip,confirmed_cases,total_tested,demographics
0,60002,147,2385,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
1,60004,432,11006,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
2,60005,278,5756,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
3,60007,406,4916,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
4,60008,424,5284,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
...,...,...,...,...
730,62988,8,121,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
731,62992,59,298,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
732,62994,6,89,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."
733,62995,20,438,"{'age': [{'age_group': 'Unknown', 'count': 0, ..."


In [136]:
zipcode_gpd.dtypes

id            object
geometry    geometry
dtype: object

In [137]:
zipcode_final_gpd = pd.merge(zipcode_gpd,zipcode_data, how = 'left', left_on=['id'], right_on=['zip'])

In [138]:
zipcode_final_gpd['confirmed_cases'] = zipcode_final_gpd['confirmed_cases'].replace(np.nan,0)
zipcode_final_gpd['total_tested'] = zipcode_final_gpd['total_tested'].replace(np.nan,0)
zipcode_final_gpd = zipcode_final_gpd[['id','confirmed_cases','total_tested','geometry']]

In [139]:
zipcode_final_gpd.to_file('dph_zipcode_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done


County Data generation


In [140]:
last_update_value = history_dict['characteristics_by_county']['values']
last_update_date = str(history_dict['LastUpdateDate']['month'])+'/'+str(history_dict['LastUpdateDate']['day'])+'/'+str(history_dict['LastUpdateDate']['year'])
last_update_date

'8/2/2020'

In [141]:
last_update_dic = {
    "testDate":last_update_date,
    "values": last_update_value
}
last_update_dic
history_dict['historical_county']['values'].append(last_update_dic)

In [142]:
# transform to new york times format
county_history = pd.DataFrame(history_dict['historical_county']['values'])
l = []
for case in history_dict['historical_county']['values']:
    if 'testDate' in case:
        testDate = case['testDate']
    else:
        # Exception for 7/3/2020
        testDate = case['testdate']
    #print(testDate)
    values = case['values']
    for x in values:
        x['date'] = testDate
        l.append(x)
county_history = pd.DataFrame(l)
county_history

Unnamed: 0,County,confirmed_cases,total_tested,deaths,lat,lon,date,negative
0,Illinois,180476,2739377,7503,39.839888,-89.510168,8/1/2020,
1,Adams,410,17340,1,40.057800,-91.135300,8/1/2020,
2,Alexander,36,1147,0,37.167000,-89.360600,8/1/2020,
3,Bond,49,2669,2,38.903000,-89.474200,8/1/2020,
4,Boone,727,8543,23,42.348500,-88.826300,8/1/2020,
...,...,...,...,...,...,...,...,...
14683,Whiteside,308,7618,16,41.760600,-89.850500,8/2/2020,
14684,Will,8592,118136,339,41.505500,-88.090100,8/2/2020,
14685,Williamson,329,11563,5,37.718300,-88.864700,8/2/2020,
14686,Winnebago,3673,52212,124,42.312100,-89.170600,8/2/2020,


In [143]:
# eliminate unassigned data
county_history = county_history[county_history['County'] != 'Unassigned']

In [144]:
def standardDate(str):
    l = str.split('/')
    month = l[0]
    day = l[1]
    year = l[2]
    if len(month) < 2:
        month = '0' + month
    if len(day) < 2:
        day = '0' + day
    return year + '-' + month + '-' + day

In [145]:
# pivot table
county_pivot = pd.pivot_table(county_history, index=['County'],columns=['date'])
county_pivot = county_pivot.replace(np.nan, 0).astype(int)

In [146]:
county_pivot['deaths'].iloc[2]

date
3/17/2020    0
3/18/2020    0
3/19/2020    0
3/20/2020    0
3/21/2020    0
            ..
7/7/2020     1
7/8/2020     1
7/9/2020     1
8/1/2020     2
8/2/2020     2
Name: Bond, Length: 138, dtype: int32

In [147]:
# Standardized Date Format
county_cases = county_pivot['confirmed_cases']
county_deaths = county_pivot['deaths']
county_tested = county_pivot['total_tested']

In [148]:
county_cases = county_cases.rename(columns=standardDate)
county_deaths = county_deaths.rename(columns=standardDate)
county_tested = county_tested.rename(columns=standardDate)

In [149]:
# Add missing 03/23 data with 03/22
county_cases['2020-03-23'] = county_cases['2020-03-22']
county_deaths['2020-03-23'] = county_deaths['2020-03-22']
county_tested['2020-03-23'] = county_tested['2020-03-22']

In [150]:
county_cases.head(1)

date,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-24,2020-03-25,2020-03-26,2020-03-27,...,2020-07-31,2020-07-04,2020-07-05,2020-07-06,2020-07-07,2020-07-08,2020-07-09,2020-08-01,2020-08-02,2020-03-23
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adams,0,0,0,1,1,1,1,1,1,1,...,393,113,122,131,134,139,142,410,424,1


In [151]:
county_tested

date,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-24,2020-03-25,2020-03-26,2020-03-27,...,2020-07-31,2020-07-04,2020-07-05,2020-07-06,2020-07-07,2020-07-08,2020-07-09,2020-08-01,2020-08-02,2020-03-23
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adams,0,0,0,0,0,0,0,0,0,0,...,16558,8733,8825,8954,9259,9339,10243,17340,17668,0
Alexander,0,0,0,0,0,0,0,0,0,0,...,1091,528,529,563,614,643,649,1147,1153,0
Bond,0,0,0,0,0,0,0,0,0,0,...,2640,1019,1058,1089,1161,1189,1215,2669,2825,0
Boone,0,0,0,0,0,0,0,0,0,0,...,8502,5547,5692,5813,5942,6019,6133,8543,8650,0
Brown,0,0,0,0,0,0,0,0,0,0,...,932,528,528,528,529,530,535,938,942,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Whiteside,0,0,0,0,0,0,0,0,0,0,...,7358,4365,4486,4569,4672,4725,4800,7529,7618,0
Will,0,0,0,0,0,0,0,0,0,0,...,115280,80150,81099,81788,82746,84145,85726,116565,118136,0
Williamson,0,0,0,0,0,0,0,0,0,0,...,11160,6374,6500,6628,6703,6764,7013,11351,11563,0
Winnebago,0,0,0,0,0,0,0,0,0,0,...,51215,34659,35115,35584,36195,36603,37149,51644,52212,0


In [152]:
# Get date information
date = county_cases.columns.tolist()
date.sort()
dt_first = date[0]
dt_today = date[-1]
dt_yesterday = date[-2]

In [153]:
from datetime import datetime
from datetime import timedelta

In [154]:
def find_missing_date(date):
    dt_range = datetime.strptime(date[-1], "%Y-%m-%d") - datetime.strptime(date[0], "%Y-%m-%d")
    if len(date) != dt_range.days + 1:
        print('Alert! Missing Date or Redundant date')
        for x in range(len(date) - 1):
            if (datetime.strptime(date[x + 1], "%Y-%m-%d") - datetime.strptime(date[x], "%Y-%m-%d")).days != 1:
                missing_date = datetime.strptime(date[x], "%Y-%m-%d") + timedelta(days=1)
                print('Missing Date:')
                print(missing_date)

In [155]:
# Looking for missing dates
find_missing_date(date)

In [156]:
# reorder date
county_cases = county_cases[date]
county_deaths = county_deaths[date]
county_tested = county_tested[date]

In [157]:
# get case time series
cases_ts = county_cases.values.tolist()
deaths_ts = county_deaths.values.tolist()
tested_ts = county_tested.values.tolist()

In [158]:
# get first case date
county_cases['dt_first_case'] = (county_cases > 0).idxmax(axis=1)
county_cases.loc[county_cases.iloc[:, -2] <= 0, 'dt_first_case'] = np.nan
county_deaths['dt_first_death'] = (county_deaths > 0).idxmax(axis=1)
county_deaths.loc[county_deaths.iloc[:, -2] <= 0, 'dt_first_death'] = np.nan

In [159]:
county_cases['cases_ts'] = cases_ts
county_deaths['deaths_ts'] = deaths_ts

In [160]:
# Add today_case and today_new_case columns
county_cases['today_case'] = county_cases[dt_today]
county_cases['today_new_case'] = county_cases[dt_today] - county_cases[dt_yesterday]
county_deaths['today_death'] = county_deaths[dt_today]
county_deaths['today_new_death'] = county_deaths[dt_today] - county_deaths[dt_yesterday]
county_tested['today_tested'] = county_tested[dt_today]
county_tested['today_new_tested'] = county_tested[dt_today] - county_tested[dt_yesterday]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [161]:
case_report = county_cases[['cases_ts','dt_first_case','today_case','today_new_case']]

In [162]:
death_report = county_deaths[['deaths_ts','dt_first_death','today_death','today_new_death']]

In [163]:
tested_report = county_tested[['today_tested','today_new_tested']]

In [164]:
county_report = case_report.join(death_report, how="outer").join(tested_report, how="outer")

In [165]:
county_report = county_report.reset_index()
county_report.columns = ['NAME','cases_ts','dt_first_case','today_case','today_new_case','deaths_ts','dt_first_death','today_death','today_new_death','today_tested','today_new_tested']
county_report['cases_ts'] = county_report['cases_ts'].apply(lambda x: ','.join(map(str,x)))
county_report['deaths_ts'] = county_report['deaths_ts'].apply(lambda x: ','.join(map(str,x)))

In [166]:
np.setdiff1d(county_report['NAME'],county_gpd_dynamic['id'])
# Illinois, Out of State and Suburban Cook is not in geometry

array(['Out Of State', 'Suburban Cook'], dtype=object)

In [167]:
county_report = county_report[(county_report['NAME'] != 'Out Of State') & (county_report['NAME'] != 'Suburban Cook')]

In [168]:
county_report['start'] = dt_first
county_report['end'] = dt_today
county_report['dt_unit'] = 'day'

In [169]:
county_report

Unnamed: 0,NAME,cases_ts,dt_first_case,today_case,today_new_case,deaths_ts,dt_first_death,today_death,today_new_death,today_tested,today_new_tested,start,end,dt_unit
0,Adams,"0,0,0,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,3,3,8,9,11...",2020-03-20,424,14,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-01,2,1,17668,328,2020-03-17,2020-08-02,day
1,Alexander,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-16,36,0,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,1153,6,2020-03-17,2020-08-02,day
2,Bond,"0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,2,3,3,3,3,3,3,3,...",2020-03-29,54,5,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-16,2,0,2825,156,2020-03-17,2020-08-02,day
3,Boone,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,6,7,...",2020-04-05,728,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...",2020-04-08,23,0,8650,107,2020-03-17,2020-08-02,day
4,Brown,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-01,13,0,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,942,4,2020-03-17,2020-08-02,day
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,Whiteside,"1,1,1,1,1,1,1,1,1,1,2,2,3,4,5,5,6,9,15,18,18,2...",2020-03-17,308,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,...",2020-04-02,16,0,7618,89,2020-03-17,2020-08-02,day
102,Will,"2,3,9,10,12,21,21,28,40,67,104,127,182,223,228...",2020-03-17,8592,90,"0,0,0,1,1,1,1,1,2,3,3,4,4,5,8,9,8,10,11,12,19,...",2020-03-20,339,0,118136,1571,2020-03-17,2020-08-02,day
103,Williamson,"0,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,3,3,3,8,9,...",2020-03-19,329,14,"0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-03-19,5,0,11563,212,2020-03-17,2020-08-02,day
104,Winnebago,"1,1,1,2,4,5,5,5,7,8,8,9,12,13,15,18,24,28,36,4...",2020-03-17,3673,10,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,3,3,...",2020-04-01,124,2,52212,568,2020-03-17,2020-08-02,day


In [170]:
county_final_gpd = pd.merge(county_gpd_dynamic, county_report, how="left", left_on="id", right_on="NAME")

In [171]:
county_final_gpd

Unnamed: 0,id,geometry,NAME,cases_ts,dt_first_case,today_case,today_new_case,deaths_ts,dt_first_death,today_death,today_new_death,today_tested,today_new_tested,start,end,dt_unit
0,Chicago,"MULTIPOLYGON (((-87.93514 42.00089, -87.93521 ...",Chicago,"0,0,0,0,0,519,519,782,915,1161,1364,1610,2026,...",2020-03-22,61601,396,"0,0,0,0,0,4,4,5,5,9,9,12,16,16,21,39,41,57,76,...",2020-03-22,2787,1,673545,9142,2020-03-17,2020-08-02,day
1,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493...",McHenry,"2,4,6,8,11,12,12,14,19,27,45,47,52,52,63,69,81...",2020-03-17,2941,32,"0,0,0,0,0,0,0,0,0,1,1,2,2,2,2,2,3,3,3,3,3,4,5,...",2020-03-26,114,1,40880,676,2020-03-17,2020-08-02,day
2,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493...",Boone,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,6,7,...",2020-04-05,728,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...",2020-04-08,23,0,8650,107,2020-03-17,2020-08-02,day
3,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184...",Ogle,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,4,5,7,10,1...",2020-03-31,371,0,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-15,5,0,7484,83,2020-03-17,2020-08-02,day
4,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708...",Will,"2,3,9,10,12,21,21,28,40,67,104,127,182,223,228...",2020-03-17,8592,90,"0,0,0,1,1,1,1,1,2,3,3,4,4,5,8,9,8,10,11,12,19,...",2020-03-20,339,0,118136,1571,2020-03-17,2020-08-02,day
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Lawrence,"POLYGON ((-87.90806 38.85013, -87.90819 38.835...",Lawrence,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,...",2020-04-07,42,2,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,2160,104,2020-03-17,2020-08-02,day
100,Marion,"POLYGON ((-89.13844 38.73633, -89.13847 38.721...",Marion,"0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,3,3,3,3,3,4,...",2020-03-30,136,4,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,5100,56,2020-03-17,2020-08-02,day
101,Union,"POLYGON ((-89.04143 37.59650, -89.06017 37.597...",Union,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-15,303,7,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-05,21,0,3403,33,2020-03-17,2020-08-02,day
102,Pope,"POLYGON ((-88.70860 37.59926, -88.70876 37.584...",Pope,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-08,8,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,281,9,2020-03-17,2020-08-02,day


In [172]:
pop_df = pd.read_csv('illinois_county_population.txt', sep="\t")
pop_df["Value"]=pop_df["Value"].replace("\,","", regex=True)
pop_df

Unnamed: 0,County,Value
0,Adams,65691
1,Alexander,6060
2,Bond,16630
3,Boone,53577
4,Brown,6556
...,...,...
97,Whiteside,55626
98,Will,692310
99,Williamson,67056
100,Winnebago,284081


In [173]:
pop_df.loc[pop_df["County"]== "RockIsland","County"] = "Rock Island"
pop_df.loc[pop_df["County"]== "DeWitt","County"] = "De Witt"
pop_df.loc[pop_df["County"]== "JoDaviess","County"] = "Jo Daviess"
pop_df.loc[pop_df["County"]== "St.Clair","County"] = "St. Clair"
row1 = {'County':'Chicago', 'Value':2693976}
row2 = {'County':'Illinois', 'Value':12671821}
pop_df = pop_df.append(row1, ignore_index=True)
pop_df = pop_df.append(row2, ignore_index=True)
pop_df["Value"] = pop_df["Value"].astype(int)
pop_df

Unnamed: 0,County,Value
0,Adams,65691
1,Alexander,6060
2,Bond,16630
3,Boone,53577
4,Brown,6556
...,...,...
99,Williamson,67056
100,Winnebago,284081
101,Woodford,38463
102,Chicago,2693976


In [174]:
county_final_gpd = pd.merge(county_final_gpd, pop_df, how="left", left_on="id", right_on="County")
county_final_gpd = county_final_gpd.drop(['County'], axis=1)
county_final_gpd = county_final_gpd.rename(columns={"Value": "population"})
county_final_gpd

Unnamed: 0,id,geometry,NAME,cases_ts,dt_first_case,today_case,today_new_case,deaths_ts,dt_first_death,today_death,today_new_death,today_tested,today_new_tested,start,end,dt_unit,population
0,Chicago,"MULTIPOLYGON (((-87.93514 42.00089, -87.93521 ...",Chicago,"0,0,0,0,0,519,519,782,915,1161,1364,1610,2026,...",2020-03-22,61601,396,"0,0,0,0,0,4,4,5,5,9,9,12,16,16,21,39,41,57,76,...",2020-03-22,2787,1,673545,9142,2020-03-17,2020-08-02,day,2693976
1,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493...",McHenry,"2,4,6,8,11,12,12,14,19,27,45,47,52,52,63,69,81...",2020-03-17,2941,32,"0,0,0,0,0,0,0,0,0,1,1,2,2,2,2,2,3,3,3,3,3,4,5,...",2020-03-26,114,1,40880,676,2020-03-17,2020-08-02,day,308570
2,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493...",Boone,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,6,7,...",2020-04-05,728,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...",2020-04-08,23,0,8650,107,2020-03-17,2020-08-02,day,53577
3,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184...",Ogle,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,4,5,7,10,1...",2020-03-31,371,0,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-15,5,0,7484,83,2020-03-17,2020-08-02,day,50923
4,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708...",Will,"2,3,9,10,12,21,21,28,40,67,104,127,182,223,228...",2020-03-17,8592,90,"0,0,0,1,1,1,1,1,2,3,3,4,4,5,8,9,8,10,11,12,19,...",2020-03-20,339,0,118136,1571,2020-03-17,2020-08-02,day,692310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Lawrence,"POLYGON ((-87.90806 38.85013, -87.90819 38.835...",Lawrence,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,...",2020-04-07,42,2,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,2160,104,2020-03-17,2020-08-02,day,15765
100,Marion,"POLYGON ((-89.13844 38.73633, -89.13847 38.721...",Marion,"0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,3,3,3,3,3,4,...",2020-03-30,136,4,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,5100,56,2020-03-17,2020-08-02,day,37620
101,Union,"POLYGON ((-89.04143 37.59650, -89.06017 37.597...",Union,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-15,303,7,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-05,21,0,3403,33,2020-03-17,2020-08-02,day,16841
102,Pope,"POLYGON ((-88.70860 37.59926, -88.70876 37.584...",Pope,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-08,8,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,281,9,2020-03-17,2020-08-02,day,4212


In [175]:
county_final_gpd.loc[county_final_gpd["population"].isna()== True,"id"]

Series([], Name: id, dtype: object)

In [176]:
county_final_gpd['change_ts'] = ""
for x in range(0, len(county_final_gpd)):
    newCaseList = [0]
    changeRateList = []
    caseStrList = county_final_gpd.loc[x,'cases_ts'].split(",")
    for i in range(1, len(caseStrList)):
        dailyNewCase = float(caseStrList[i]) - float(caseStrList[i-1])
        newCaseList.append(dailyNewCase)
    for i in range(0, len(newCaseList)):
        if i < 13:
            changeRate = 0
        else:
            currentWeekSum = 0
            previousWeekSum = 0
            for j in range(0,7):
                currentWeekSum = currentWeekSum + newCaseList[i-j]
            for k in range(7,14):
                previousWeekSum = previousWeekSum + newCaseList[i-k]
            if previousWeekSum == 0 and currentWeekSum == 0:
                changeRate = 0
            elif previousWeekSum == 0 and currentWeekSum != 0:
                changeRate = currentWeekSum
            else:
                changeRate = round(currentWeekSum/previousWeekSum-1,2)
        changeRateList.append(changeRate)
        changeRateStrList = list(map(str, changeRateList))
#     print(','.join(changeRateStrList))
    county_final_gpd.loc[x,'change_ts'] = ','.join(changeRateStrList)

county_final_gpd

Unnamed: 0,id,geometry,NAME,cases_ts,dt_first_case,today_case,today_new_case,deaths_ts,dt_first_death,today_death,today_new_death,today_tested,today_new_tested,start,end,dt_unit,population,change_ts
0,Chicago,"MULTIPOLYGON (((-87.93514 42.00089, -87.93521 ...",Chicago,"0,0,0,0,0,519,519,782,915,1161,1364,1610,2026,...",2020-03-22,61601,396,"0,0,0,0,0,4,4,5,5,9,9,12,16,16,21,39,41,57,76,...",2020-03-22,2787,1,673545,9142,2020-03-17,2020-08-02,day,2693976,"0,0,0,0,0,0,0,0,0,0,0,0,0,2.24,1.44,1.37,0.82,..."
1,McHenry,"POLYGON ((-88.70742 42.49352, -88.70741 42.493...",McHenry,"2,4,6,8,11,12,12,14,19,27,45,47,52,52,63,69,81...",2020-03-17,2941,32,"0,0,0,0,0,0,0,0,0,1,1,2,2,2,2,2,3,3,3,3,3,4,5,...",2020-03-26,114,1,40880,676,2020-03-17,2020-08-02,day,308570,"0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,3.08,2.33,1.57,0..."
2,Boone,"POLYGON ((-88.70742 42.49352, -88.70750 42.493...",Boone,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,6,7,...",2020-04-05,728,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...",2020-04-08,23,0,8650,107,2020-03-17,2020-08-02,day,53577,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.0,4.0,..."
3,Ogle,"POLYGON ((-89.68809 42.19950, -89.68807 42.184...",Ogle,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,4,5,7,10,1...",2020-03-31,371,0,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-15,5,0,7484,83,2020-03-17,2020-08-02,day,50923,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,1.0,1.0,1.0,4...."
4,Will,"POLYGON ((-88.26146 41.72439, -88.26103 41.708...",Will,"2,3,9,10,12,21,21,28,40,67,104,127,182,223,228...",2020-03-17,8592,90,"0,0,0,1,1,1,1,1,2,3,3,4,4,5,8,9,8,10,11,12,19,...",2020-03-20,339,0,118136,1571,2020-03-17,2020-08-02,day,692310,"0,0,0,0,0,0,0,0,0,0,0,0,0,9.63,6.69,6.62,4.47,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Lawrence,"POLYGON ((-87.90806 38.85013, -87.90819 38.835...",Lawrence,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,...",2020-04-07,42,2,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,2160,104,2020-03-17,2020-08-02,day,15765,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,..."
100,Marion,"POLYGON ((-89.13844 38.73633, -89.13847 38.721...",Marion,"0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,3,3,3,3,3,4,...",2020-03-30,136,4,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,5100,56,2020-03-17,2020-08-02,day,37620,"0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,2.0,2.0,2.0,3.0,..."
101,Union,"POLYGON ((-89.04143 37.59650, -89.06017 37.597...",Union,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-04-15,303,7,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-05,21,0,3403,33,2020-03-17,2020-08-02,day,16841,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."
102,Pope,"POLYGON ((-88.70860 37.59926, -88.70876 37.584...",Pope,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2020-05-08,8,1,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",,0,0,281,9,2020-03-17,2020-08-02,day,4212,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,..."


In [177]:
county_final_gpd['geometry'] = county_final_gpd.apply(lambda x: shapely.wkt.loads(shapely.wkt.dumps(x.geometry, rounding_precision=8)).simplify(0), axis = 1)

In [178]:
county_final_gpd.to_file('dph_county_data.geojson', driver='GeoJSON', encoding='utf-8')
print('done')

done
