# Illinois Death Demographics

In [1]:
import utils
import pandas as pd
from datetime import date

%load_ext autoreload
%autoreload 2

### IDPH only publishes death demographics on and after 3/28, earlier entries (36 deaths) needs manual input

In [22]:
manual_entries = utils.IL_death_demographic_early()  # check manual entries
manual_entries.head()

Unnamed: 0,Date,County,Count,Sex,Age_bracket,Reference
0,2020-03-17,Cook,1,female,60.0,http://www.dph.illinois.gov/news/public-health...
1,2020-03-19,Will,1,male,40.0,http://www.dph.illinois.gov/news/public-health...
2,2020-03-19,Cook,1,female,80.0,http://www.dph.illinois.gov/news/public-health...
3,2020-03-19,Sangamon,1,female,70.0,http://www.dph.illinois.gov/news/public-health...
4,2020-03-20,Cook,1,female,70.0,http://www.dph.illinois.gov/news/public-health...


## parse_IL_death_demographic combines manual early entry and later automatic parser

In [45]:
to_date = date.today()
result = utils.parse_IL_death_demographic(pd.date_range('2020-02-01', to_date))

In [46]:
result.to_csv('../outputs/death_demographic_' + str(to_date) + '.csv')

In [47]:
pivot = pd.pivot_table(result, index='Age_bracket', columns='Sex', values='Count', aggfunc='sum')
pivot

Sex,female,male,unknown
Age_bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,,,1.0
8,1.0,,
20,,2.0,
30,1.0,5.0,
40,3.0,8.0,
50,3.0,17.0,
60,24.0,30.0,3.0
70,24.0,40.0,
80,16.0,25.0,
90,15.0,9.0,


#### Prepare for Flourish.studio

In [94]:
total = result['Count'].sum()
pivot = pivot.rename(columns={'female': 'female count', 'male': 'male count', 'unknown': 'unknown count'})
pivot['female'] = pivot['female count'] / total * 100
pivot['male'] = pivot['male count'] / total * 100
pivot['unknown'] = pivot['unknown count'] / total * 100

In [87]:
pivot

Sex,female count,male count,unknown count,female,male,unknown
Age_bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,,,1.0,,,0.47619
20,,1.0,,,0.47619,
30,1.0,4.0,,0.47619,1.904762,
40.0,2.0,6.0,,0.952381,2.857143,
50.0,3.0,15.0,,1.428571,7.142857,
60.0,21.0,28.0,3.0,10.0,13.333333,1.428571
70.0,20.0,32.0,,9.52381,15.238095,
80.0,14.0,23.0,,6.666667,10.952381,
90.0,13.0,8.0,,6.190476,3.809524,
100,2.0,,,0.952381,,


In [95]:
pivot.to_csv('../outputs/death_demographic_2020-04-03_flourish.csv')

In [18]:
import requests
from bs4 import BeautifulSoup

link = 'https://coronavirus.illinois.gov/s/county-map'
#headers = requests.utils.default_headers()
#headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'})
req = requests.get(link)
soup = BeautifulSoup(req.content, 'html.parser')

In [19]:
[len(l.text) for l in soup.find_all('script')]

[345, 0, 0, 0, 0, 0, 0, 0]

In [34]:
for elm in soup.find_all('script'):
    print('Cook' in elm.text)

False
False
False
False
False
False
False
False


In [26]:
'Cook' in str(ll)

False

In [35]:
'ab' in 'abcd'

True