Input:
- Startzeitpunkt
- Endzeitpunkt
- Prediction Data


Output:
- Observations für den Zeitraum Start - Ende
- Serial Interval der Länge des Zeitraums (Start - Ende)
- Prediction Data für den Zeitraum Start - Ende

In [11]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Initialization

## Initialize Observations

In [36]:
def initialize_observations(filename='observations_03022022.csv', start='2020-02-26', end='2022-01-31', observations=['Number_of_deaths', 'Confirmed_cases', 'Admissions_hospital']):
  filename_obs = r'/content/drive/MyDrive/GitHub/master-thesis/data/' + filename
  df_obs = pd.read_csv(filename_obs, sep=';', thousands='.', decimal=',', encoding='utf-8', parse_dates=['Date'])
  time_period = (df_obs['Date'] >= start) & (df_obs['Date'] < end)
  columns = ['Date'] + observations
  df_obs_filtered = df_obs.loc[time_period][columns].reset_index(drop=True)
  return df_obs_filtered

In [45]:
initialize_observations(observations=['Confirmed_cases'])

Unnamed: 0,Date,Confirmed_cases
0,2020-02-26,1
1,2020-02-27,1
2,2020-02-28,1
3,2020-03-01,1
4,2020-03-02,4
...,...,...
699,2022-01-26,41724
700,2022-01-27,39088
701,2022-01-28,35946
702,2022-01-29,28186


## Initialize Serial Interval & Delay Distribution
To Do: Change SI and DD, so that we can calculate for more than 407 days

In [32]:
def initialize_si_and_dd(start='2020-02-28', end='2022-01-31'):
  df_si_dd = pd.read_csv('/content/drive/MyDrive/GitHub/master-thesis/data/si_and_dd.csv', sep=';', decimal=',', encoding='utf-8', dtype={'serial_interval': np.float64, 'delay_distr': np.float64})
  start_dt = datetime.strptime(start, '%Y-%m-%d')
  end_dt = datetime.strptime(end, '%Y-%m-%d')
  duration = end_dt - start_dt
  return df_si_dd.loc[:duration.days]

In [46]:
initialize_si_and_dd(start='2020-02-28', end='2021-06-28')

Unnamed: 0,serial_interval,delay_distr
0,0.046535,0.013006
1,0.087065,0.030046
2,0.112061,0.044674
3,0.119346,0.055473
4,0.114540,0.062422
...,...,...
402,0.000000,0.000000
403,0.000000,0.000000
404,0.000000,0.000000
405,0.000000,0.000000


## Initialize Prediction Data

# Initialize Model

In [34]:
def initialize_model(start='2020-02-28', end='2022-01-31', pred_data='', observations=['Number_of_deaths', 'Confirmed_cases', 'Admissions_hospital']):
  df_observations = initialize_observations(start=start, end=end, observations=observations)
  df_si_and_dd = initialize_si_and_dd(start=start, end=end)

  return df_observations, df_si_and_dd

In [47]:
df_init_obs, df_init_sidd = initialize_model()
df_init_obs.head()

Unnamed: 0,Date,Number_of_deaths,Confirmed_cases,Admissions_hospital
0,2020-02-28,0,1,0
1,2020-03-01,0,1,1
2,2020-03-02,0,4,0
3,2020-03-03,0,4,1
4,2020-03-04,0,3,0


In [48]:
df_init_sidd.head()

Unnamed: 0,serial_interval,delay_distr
0,0.046535,0.013006
1,0.087065,0.030046
2,0.112061,0.044674
3,0.119346,0.055473
4,0.11454,0.062422
