# Import Data from source

Imports Data from openZH Dataset (https://github.com/openZH/covid_19). The Covid Numbers are separated per Canton in different Files.

In [1]:
import requests
import io
import pandas as pd
import shutil
import glob
import os

# List of Cantons
cantons = ['AG', 'AI', 'AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG', 'ZH']
baseUrl = 'https://raw.githubusercontent.com/openZH/covid_19/master/fallzahlen_kanton_total_csv_v2/'


# Load Data of all Cantons
df = pd.DataFrame()
for canton in cantons:
    filename = 'COVID19_Fallzahlen_Kanton_{}_total.csv'.format(canton)
    url = baseUrl + filename
    # print(url)
    s=requests.get(url).content
    dfCanton = pd.read_csv(io.StringIO(s.decode('utf-8')))
    dfCanton['date'] = pd.to_datetime(dfCanton['date'])
    # Calculate Daily Numbers
    dfCanton['daily_tested'] = dfCanton['ncumul_tested'] - dfCanton['ncumul_tested'].shift()
    dfCanton['daily_conf'] = dfCanton['ncumul_conf'] - dfCanton['ncumul_conf'].shift()
    dfCanton['daily_released'] = dfCanton['ncumul_released'] - dfCanton['ncumul_released'].shift()
    dfCanton['daily_deceased'] = dfCanton['ncumul_deceased'] - dfCanton['ncumul_deceased'].shift()
    # Calculate Test / Positive Relation
    dfCanton['testRelation'] = dfCanton['daily_conf'] / (dfCanton['daily_conf'] + dfCanton['daily_tested'])
    # Calculate 7 Day Means
    dfCanton['seven_day_conf'] = dfCanton['daily_conf'].rolling(window=7).mean()
    dfCanton['seven_day_dec'] = dfCanton['daily_deceased'].rolling(window=7).mean()
    df = df.append(dfCanton)

# Clean
dfClean = df[['date', 'abbreviation_canton_and_fl', 'daily_tested', 'ncumul_tested', 'daily_conf', 'ncumul_conf', 'seven_day_conf', 'testRelation', 'new_hosp', 'current_icu', 'current_vent', 'daily_released', 'ncumul_released', 'daily_deceased', 'ncumul_deceased', 'seven_day_dec', 'current_isolated', 'current_quarantined_total']]

dfClean.columns = ['date', 'canton', 'daily_tested', 'ncumul_tested', 'daily_conf', 'ncumul_conf', 'seven_day_conf', 'testRelation', 'new_hosp', 'current_icu', 'current_vent', 'ncumul_released', 'daily_released', 'daily_deceased', 'ncumul_deceased', 'seven_day_dec', 'current_isolated', 'current_quarantined_total']

# Export
dfClean.to_csv('../Data/CovidData.csv', index=False)

dateOldest = min(dfClean.date)
dateNewest = max(dfClean.date)
print("Imported days: " + str((dateNewest - dateOldest).days))
print("First Date: " + str(dateOldest))
print("Last Date: " + str(dateNewest))

Imported days: 692
First Date: 2020-01-01 00:00:00
Last Date: 2021-11-23 00:00:00


# Save a copy to Archive Folder

In [2]:
# Save to CSV
today = pd.to_datetime("today")
dfClean.to_csv('../Data/Archive/' + str(today.year) + str(f"{today:%m}") + str(f"{today:%d}") + '.csv', index=False)