# stochastic analysis of COVID19 time series in "Baden-Württemberg"
data as of 10.06.2020 by https://sozialministerium.baden-wuerttemberg.de/de/gesundheit-pflege/gesundheitsschutz/infektionsschutz-hygiene/informationen-zu-coronavirus/lage-in-baden-wuerttemberg/

algorithm from "Analysis of time series from stochastic processes" by J.Gradisek, S.Riegert, R.Freidrich, I.Grabec [1]

In [152]:
import pandas as pd
import numpy as np
from scipy import signal, stats, special
from scipy.optimize import curve_fit
from scipy.integrate import solve_ivp, quad
import matplotlib.pyplot as plt
import datetime

## 1. extract values from selected list
variables names: *I: Infected; *T: Deceased

In [2]:
filename = 'Tabelle_Coronavirus-Faelle-BW.xlsx'
dfi = pd.read_excel(filename, sheet_name='Infizierte Coronavirus in BW', header=None)
dft = pd.read_excel(filename, sheet_name='Todesfälle Coronavirus in BW', header=None)

In [3]:
ctyI = np.nan_to_num(np.asarray(dfi))
ctyT = np.nan_to_num(np.asarray(dft))
# row 7: dates
# row 8 to 51: regions
    # row 18: Freiburg
    # row 38: Ravensburg
# row 52: sum

### 1.1 plot regional data

In [4]:
index = 17
region = ctyI[index][0]
title = 'Corona cases in %s' % region

# calculate x data: dates
# currently: approximation where each month is approximated by 30 days.
xI = [el.day+el.month*30 if type(el) == datetime.datetime else False for el in ctyI[6]]
xT = [el.day+el.month*30 if type(el) == datetime.datetime else False for el in ctyT[6]]

# calculate trajectory points from data of given index
yI = [el if type(el) == int else False for el in ctyI[index]]
yT = [el if type(el) == int else False for el in ctyT[index]]

# remove first element of lists (column of regions, only str in these)
del xI[0]
del xT[0]
del yI[0]
del yT[0]

# flip arrays
xI = np.flip(xI)
xT = np.flip(xT)
yI = np.flip(yI)
yT = np.flip(yT)

# Calculate new cases per day
dyIdt = np.diff(yI, prepend=0)
    
# Calculate new deaths per day
dyTdt = np.diff(yT,prepend=0)

# Checking for mistakes
print('I1:', len(xI) == len(yI))
print('T1:', len(xT) == len(yT))
print('dIdt:', len(xI) == len(dyIdt))
print('dTdt:', len(xT) == len(dyTdt))
checksum = 0
for el in dyIdt:
    checksum += el
print("checksum I:", checksum == yI[-1])
checksum = 0
for el in dyTdt:
    checksum += el
print("checksum T:", checksum == yT[-1])

I1: True
T1: True
dIdt: True
dTdt: True
checksum I: True
checksum T: True


In [163]:
# Plot infected, deceased and new cases per day
%matplotlib notebook
plt.scatter(xI, yI, label='Infected', s=2)
plt.scatter(xT, yT, label='Deceased', s=2)
plt.plot(xI, dyIdt, label='new cases per day', c='g')
plt.xlabel('Time [days]')
plt.ylabel('Cases')
plt.title(title)
plt.legend(loc='upper left')
plt.show()
#plt.savefig('%sCases.png' % region)

<IPython.core.display.Javascript object>

### 1.2 plot total data

In [6]:
# calculation of x-data in cell above

# calculate total sum
yI_total = [el for el in ctyI[51]]
yT_total = [el for el in ctyT[51]]

# remove first element of lists (column of regions, only str in these)
del yI_total[0]
del yT_total[0]

# flip arrays
yI_total = np.flip(yI_total)
yT_total = np.flip(yT_total)

# Calculate new cases per day
dyI_totaldt = np.diff(yI_total, prepend=0)
    
# Calculate new deaths per day
dyT_totaldt = np.diff(yT_total, prepend=0)
    
# Checking for mistakes
print('I2:', len(xI) == len(yI_total))
print('T2:', len(xT) == len(yT_total))
print('dI_totaldt:', len(xI) == len(dyI_totaldt))
print('dT_totaldt:', len(xT) == len(dyT_totaldt))
checksum = 0
for el in dyI_totaldt:
    checksum += el
print("checksum I:", checksum == yI_total[-1])
checksum = 0
for el in dyT_totaldt:
    checksum += el
print("checksum T:", checksum == yT_total[-1])

I2: True
T2: True
dI_totaldt: True
dT_totaldt: True
checksum I: True
checksum T: True


In [164]:
# Plot infected, deceased and new cases per day
%matplotlib notebook
plt.scatter(xI, yI_total, label='Total infected', s=2)
plt.scatter(xT, yT_total, label='Total deceased', s=2)
plt.plot(xI, dyI_totaldt, label='new cases per day', c='g')
plt.xlabel('Time [days]')
plt.ylabel('Cases')
plt.title('total Corona cases Baden-Württemberg')
plt.legend(loc='upper left')
plt.show()
#plt.savefig('BWCases.png')

<IPython.core.display.Javascript object>

## 2. perquisites for stochastic analysis

### 2.1 noised HO data
We will use the harmonic oscillator (HO) as example data. In the following, noised data will be created. Using the commonly known analytical solution of HO:

$$
\dot{x} = v\\
\dot{v} = -x + \Gamma(t) - \gamma v
$$

In [251]:
epsilon = 10e-3
tau = 1000
r = np.linspace(0, 2*np.pi, num=tau)
x = [np.sin(t) + np.random.randn() * epsilon for t in r]
v = [np.cos(t) + np.random.randn() * epsilon for t in r]

In [244]:
%matplotlib notebook
plt.clf()
plt.plot(x, label='x')
plt.plot(v, label='v')
plt.xlabel('timestep t')
plt.ylabel('x / v')
plt.title('analytical solution of HO\nx=sin(t); v=cos(t)')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

### 2.2 evaluating p(x,t|x',t')
First we calculate the histogram of our HO example to get a brief overview of what we want.

Secondly we will define a function which calculates the probability of the time series having the value x at time t.

#### 2.2.1 showing example histogram of HO

In [246]:
%matplotlib notebook
plt.clf()
plt.hist(x, bins=100, range=(-2, 2), histtype='step', label='x')
plt.hist(v, bins=100, range=(-2, 2), histtype='step', label='v')
plt.title('histogram of x/v')
plt.xlabel('x-value of occurence')
plt.ylabel('occurences of x/v')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

#### 2.2.2 declaring functions

In [357]:
def probability_hist(s, bins=100):
    '''histogram
    
    Returns a histogram as array of array for a given time series s.
    
    Parameters:
        - (array like) s: time series
        - (int or sequence of scalars or str, optional) bins: f bins is an int,
            it defines the number of equal-width bins in the given range 
        
    Returns:
        - (list) x, (list) y
            Where (list of int) x represents the x-value of occurence and (list of int) y
            the probability of occurence: y[n] in [0, 1] for all n, where n = len(s)
    '''
    n = len(s)
    y, x = np.histogram(s, bins=bins)
    
    y_ = [el / n for el in y]
    
    return x, y_

def p(s: list, x: float, t: int, bins: int=100) -> float:
    '''proability function
    
    Returns the probability for a time series s having at time t the value x.
    
    Parameters:
        - (array like) s: time series
        - (float) x: value the series shouls have
        - (int) t: time at which to evaluate
    '''
    hist = probability_hist(s, bins=bins)
    
    # checking if x is in interval
    # hist[0]: x values
    # hist[1]: probability
    min = np.min(hist[0])
    max = np.max(hist[0])
    if x < min or x > max:
        # if x is out of interval the probability is zero
        print('[!] Warning: x (%f) is out of range at min %f, max %f' % x, min, max)
        return 0
    
    # find nearest value of x in x-values hist[0]
    idx = (np.abs(hist[0][:-1] - x)).argmin()
    
    return hist[1][idx]

def p_cond(s: list, x: float, t: int, x_: float, t_: float, bins: int=100) -> float:
    '''conditional probability function
    as used in [1](6a): p(y, t + tau | x, t)
    
    returns the probability of s having the value x at time t, where at time t_ value x_ was present.
    '''
    hist = probability_hist(s, bins=bins)
    pass

#### 2.2.3 testing functions

In [377]:
# probability_hist()
# test on HO Data
%matplotlib notebook

hist = probability_hist(x, bins=50)
checksum = np.sum(hist[1])

plt.clf()
plt.plot(hist[0][:-1], hist[1])
plt.xlim((-2, 2))
plt.title('Histogram of HO x data\nchecksum should be near 1: %f' % checksum)
plt.show()

<IPython.core.display.Javascript object>

In [385]:
# probability_hist()
# test on COVID Data
%matplotlib notebook
hist = probability_hist(dyI_totaldt, bins=25)
checksum = np.sum(hist[1])

plt.clf()
plt.plot(hist[0][:-1], hist[1])
plt.title('Histogram of CoViD data\nchecksum should be near 1: %f' % checksum)
plt.show()

<IPython.core.display.Javascript object>

In [390]:
# p()
r = np.linspace(-100, 100, 100)
checksum = 0
for i in r:
    p_val = p(x, i / 100, 0, bins=100) # Currently parameter t (here t=0) does NOTHING!
    checksum += p_val
    
print('checksum should be near 1: %f' % checksum)

checksum should be near 1: 0.976000


### 2.3 solve HO numerically

In [253]:
# Define the function to deliver the r.h.s. of the system
def f(t, y):
    return y[1], -y[0]

In [226]:
# Integrate dy/dt=f(t,y)
r0 = [1., 0.]
t0 = 0.
t1 = 10.
dt = 0.001
rtol = 1.e-12
atol = 1.e-14
teval = np.linspace(t0, t1, int((t1 - t0) / dt + 1))
sol = solve_ivp(f, [t0, t1], r0, method='DOP853', t_eval=teval, rtol=rtol, atol=atol)

In [250]:
# plot the numerical solutions curves (x,v)
%matplotlib notebook

x = sol.y[0]
v = sol.y[1]
plt.plot(x, label='x')
plt.plot(v, label='v')
plt.xlabel('timestep t')
plt.ylabel('x / v')
plt.title('numerical solution of HO\nusing DOP853')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

## literature

[1] "Analysis of time series from stochastic processes" by J.Gradisek, S.Riegert, R.Freidrich, I.Grabec

[2] "Simulation - Advanced Topics" by A.Greiner as of June 11, 2020