# Illinois Death Demographics

In [17]:
import utils
import pandas as pd
from datetime import date

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### IDPH only publishes death demographics on and after 3/28, earlier entries (36 deaths) needs manual input

In [22]:
manual_entries = utils.IL_death_demographic_early()  # check manual entries
manual_entries.head()

Unnamed: 0,Date,County,Count,Sex,Age_bracket,Reference
0,2020-03-17,Cook,1,female,60.0,http://www.dph.illinois.gov/news/public-health...
1,2020-03-19,Will,1,male,40.0,http://www.dph.illinois.gov/news/public-health...
2,2020-03-19,Cook,1,female,80.0,http://www.dph.illinois.gov/news/public-health...
3,2020-03-19,Sangamon,1,female,70.0,http://www.dph.illinois.gov/news/public-health...
4,2020-03-20,Cook,1,female,70.0,http://www.dph.illinois.gov/news/public-health...


## parse_IL_death_demographic combines manual early entry and later automatic parser

In [9]:
%pdb on

Automatic pdb calling has been turned ON


In [11]:
to_date = date.today()
result = utils.parse_IL_death_demographic(pd.date_range('2020-02-01', to_date))

In [12]:
result.to_csv('../outputs/death_demographic_' + str(to_date) + '.csv')

In [13]:
pivot = pd.pivot_table(result, index='Age_bracket', columns='Sex', values='Count', aggfunc='sum')
pivot

Sex,female,male,unknown
Age_bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,,,1.0
20,,2.0,
30,1.0,5.0,
40,3.0,10.0,
50,6.0,19.0,
60,25.0,34.0,3.0
70,29.0,43.0,
80,25.0,26.0,
90,16.0,10.0,
100,3.0,,


#### Prepare for Flourish.studio

In [14]:
total = result['Count'].sum()
pivot = pivot.rename(columns={'female': 'female count', 'male': 'male count', 'unknown': 'unknown count'})
pivot['female'] = pivot['female count'] / total * 100
pivot['male'] = pivot['male count'] / total * 100
pivot['unknown'] = pivot['unknown count'] / total * 100

In [15]:
pivot

Sex,female count,male count,unknown count,female,male,unknown
Age_bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,,,1.0,,,0.364964
20,,2.0,,,0.729927,
30,1.0,5.0,,0.364964,1.824818,
40,3.0,10.0,,1.094891,3.649635,
50,6.0,19.0,,2.189781,6.934307,
60,25.0,34.0,3.0,9.124088,12.408759,1.094891
70,29.0,43.0,,10.583942,15.693431,
80,25.0,26.0,,9.124088,9.489051,
90,16.0,10.0,,5.839416,3.649635,
100,3.0,,,1.094891,,


In [16]:
pivot.to_csv('../outputs/death_demographic_' + str(to_date) + '_flourish.csv')

In [51]:
import requests
from bs4 import BeautifulSoup

link = 'https://coronavirus.illinois.gov/s/county-map'
#headers = requests.utils.default_headers()
#headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'})
req = requests.get(link)
soup = BeautifulSoup(req.content, 'html.parser')

In [65]:
soup.find_all('script')

[<script>
     document.write('<style>');
 	document.write('a.header {font-size: 20px; margin-right: 32px;}');
     document.write('</style>');
 
     document.write('<div class="header-email-line" align="right">');
 
     document.write('<a class=header href="mailto:DPH.SICK@ILLINOIS.GOV" target="_top">DPH.SICK@ILLINOIS.GOV</a>');
 	document.write('</div>');
 </script>,
 <script data-src="/jslibrary/1585773144000/canvas/CanvasRendering.js"></script>,
 <script data-src="/jslibrary/1585773144000/ui-analytics-reporting/EclairNG.js"></script>,
 <script src="/s/sfsites/l/%7B%22mode%22%3A%22PROD%22%2C%22app%22%3A%22siteforce%3AcommunityApp%22%2C%22fwuid%22%3A%22kHqYrsGCjDhXliyGcYtIfA%22%2C%22loaded%22%3A%7B%22APPLICATION%40markup%3A%2F%2Fsiteforce%3AcommunityApp%22%3A%22jezy9eMvUqztJqL1PQNDdA%22%7D%2C%22apce%22%3A1%2C%22apck%22%3A%22XqIkx4nyJvDxX87Zu6rF7g%22%2C%22mlr%22%3A1%2C%22pathPrefix%22%3A%22%22%2C%22dns%22%3A%22c%22%2C%22ls%22%3A1%2C%22ct%22%3A1%7D/inline.js?aura.attributes=%7B%22sch

In [60]:
soup.script

<script>
    document.write('<style>');
	document.write('a.header {font-size: 20px; margin-right: 32px;}');
    document.write('</style>');

    document.write('<div class="header-email-line" align="right">');

    document.write('<a class=header href="mailto:DPH.SICK@ILLINOIS.GOV" target="_top">DPH.SICK@ILLINOIS.GOV</a>');
	document.write('</div>');
</script>

In [19]:
[len(l.text) for l in soup.find_all('script')]

[345, 0, 0, 0, 0, 0, 0, 0]

In [34]:
for elm in soup.find_all('script'):
    print('Cook' in elm.text)

False
False
False
False
False
False
False
False


In [26]:
'Cook' in str(ll)

False

In [35]:
'ab' in 'abcd'

True