# Scrapping RIVM to get stats of COVID-19 cases in NL

Scrapping the page:

In [1]:
import cfscrape
from lxml import etree

url="https://www.rivm.nl/coronavirus-kaart-van-nederland-per-gemeente"

header = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
          'Accept-Encoding': 'gzip, deflate, sdch',
          'Accept-Language' : 'nl-NL,nl;q=0.8,en-US;q=0.6,en;q=0.4',
          'Cache-Control' : 'max-age=0',
          'Connection': 'keep-alive',
          'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36'}

scraper = cfscrape.create_scraper()
scraped_html=scraper.get(url,headers=header).content
html = etree.HTML(scraped_html)

date = html.xpath("//div[@id='mapTitles']/text()")[0].split('tot en met ')[1].split('"')[0]
time = html.xpath("//p/text()")[0].split()[5]
data = html.xpath("//div[@id='csvData']/text()")

print("Last update from the RIVM page:",date,time)

Last update from the RIVM page: 20-3-2020 14.00


Loading the data in a dataframe:

In [2]:
import pandas as pd
import io
df = pd.read_csv(io.StringIO('\n'.join(str(data[0]).split('\n')[1:])), sep=';')

Workaround to get cases in 'unknown' municipalities:

In [3]:
import re
aantal_unknown_gemeente = int(re.findall(r'\d+',df['Gemeente'][0])[0])
df.loc[0,'Gemeente']='Unknown'
df.loc[0,'Aantal']= aantal_unknown_gemeente

Getting summary (Total, Utrecht, and Enschede):

In [4]:
aantal_utrecht = df[df['Gemeente']=='Utrecht']['Aantal'].values[0]
aantal_enschede = df[df['Gemeente']=='Enschede']['Aantal'].values[0]
aantal_total = df['Aantal'].sum()

summary = "Last update from RIVM ("+ date + " at "+time+")" +\
            "\nhttps://www.rivm.nl/coronavirus-kaart-van-nederland-per-gemeente"+\
            "\nTotal cases in NL: " + str(aantal_total) +\
            "\nCases in Enschede: "+str(aantal_enschede)+\
            "\nCases in Utrecht: "+str(aantal_utrecht)
print(summary)

Last update from RIVM (20-3-2020 at 14.00)
https://www.rivm.nl/coronavirus-kaart-van-nederland-per-gemeente
Total cases in NL: 2994
Cases in Enschede: 8
Cases in Utrecht: 101


Exporting to CSV (for future usage of the data):

In [5]:
import pathlib
file_name = pathlib.Path('data/covid19-nl-'+date+'-'+time+'-'+str(aantal_total)+'.csv')
if not file_name.exists ():
    df.to_csv(file_name, index = False)

Sending a Telegram message to the group:

In [6]:
import requests
def telegram_bot_sendtext(bot_message,token,chatid):
    bot_token = token
    bot_chatID = chatid
    send_text = 'https://api.telegram.org/bot' + bot_token + '/sendMessage?chat_id=' + bot_chatID + '&parse_mode=Markdown&text=' + bot_message
    response = requests.get(send_text)
    if response.json()['ok']:
        return "Message Sent!"
    else:
        return "Message failed to be sent!"

In [7]:
bot_token = ''
bot_chatID = ''    

In [8]:
telegram_bot_sendtext(summary,bot_token,bot_chatID)

'Message Sent!'