# Initial Stage of the Novel Coronavirus Disease 2019 Epidemics Outbreak Analytical Modeling

**Abstract** $-$ This report overviews different analytical techniques to tackle the problem of the initial stage Coronavirus Disease 2019 (COVID-19) modeling. Using the data on confirmed infections in the closed territory of the Republic of Croatia, both exponential and logistic fitting has been performed on the different stages of the epidemics. Deriving only the time series of active infections from the total daily number of newly confirmed infected individuals, the cumulative number of recovered (expected recovery rate, $\gamma = 1/15$) and the cumulative number of deceased individuals due to infection, Heidler function fitting has been performed for the complete duration of the first epidemic *wave*. Results show that logistic and Heidler function achieve exceptional goodness-of-fit for the cumulative number of confirmed cases and the active number of confirmed cases, respectively, for the complete duration of the first epidemic *wave*. Exponential fit, however, successfully fits only a very short period of the initial outbreak from Feb, $25^{th}$ to Mar, $18^{th}$ when the curve growth for the cumulative number of confirmed cases takes on a linear characteristic.

In [None]:
import os
import datetime
from io import StringIO
import requests

from coropy.growth_models import GrowthCOVIDModel
from matplotlib import dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import utils

In [None]:
np.random.seed(42)  # for reproducibility
utils.configure(grid=False)  # paper-quality plots

blue = utils.default_colors('blue')
purple = utils.default_colors('purple')

In [None]:
# data load
DATA_DIR = os.path.join(os.pardir, 'data')
FILE_NAME = '1st_wave_data_cro.csv' 
FILE_PATH = os.path.join(DATA_DIR, FILE_NAME)
df = pd.read_csv(FILE_PATH)

# derived data
cum_positives = np.cumsum(df.confirmed_positives.values)
cum_recovered = df.estimate_recovered.values
cum_deceased = np.cumsum(df.confirmed_deceased.values)
active = cum_positives - cum_recovered - cum_deceased

## Exponential function

$$
f(x) = a \cdot e^{b x} + c
$$

In [None]:
start_date = datetime.datetime(2020, 2, 25)
end_critical = datetime.datetime(2020, 3, 18)
dur_critical = (end_critical - start_date).days
n_days_future = 10
y_train, y_test = cum_positives[:dur_critical], cum_positives[dur_critical:dur_critical+n_days_future]

# fit the critical initial period to model and evaulate the extrapolation performance
model = GrowthCOVIDModel('exponential', normalize=True, calc_ci=True)
model.fit(y_train)
x_fit, y_fit = model.get_fitted
x_pred, y_pred = model.predict(n_days_future)

# prinout
sre_fit = np.sqrt(np.mean((y_train - y_fit[1])**2))
sre_predict = np.sqrt(np.mean((y_test - y_pred[1])**2))
a, b, c = model.get_params
print(
    f'Exponential function normalized parameters \n'
    f'------------------------------------------ \n'
    f'x ∈ [0, 1] and y ∈ [0, 1] where y=f(x) \n'
    f'------------------------------------------ \n'
    f'a = {a}\n'
    f'b = {b}\n'
    f'c = {c}\n'
    f'\n'
    f'goodness-of-fit \n'
    f'--------------- \n'
    f'[fitted] S = {sre_fit}\n'
    f'[extrapolated] S = {sre_predict}\n'
    )

In [None]:
# visualize
fig = plt.figure()
ax = fig.subplots(nrows=1, ncols=1)
ax.plot(x_fit, y_train, '.', color=blue, label='[train] confirmed positives')
ax.plot(x_pred, y_test, '.', color=purple, label='[test] confirmed positives')
ax.plot(x_fit, y_fit[1], '-', color=blue, label='[fitted] $a \cdot \exp(b \cdot x) + c$')
ax.fill_between(x_fit, y_fit[0], y_fit[2], color=blue, alpha=0.2)
ax.plot(x_pred, y_pred[1], '-', color=purple, label='[extrapolated] $a \cdot \exp(b \cdot x) + c$')
ax.fill_between(x_pred, y_pred[0], y_pred[2], color=purple, alpha=0.2)
ax.set_xlabel(r'$\Delta t$ days from Feb, $25^{th}$')
ax.set_ylabel(r'$N$')
ax.legend(loc='upper left')
plt.show()

## 2 Logistic function

$$
f(x) = \frac{a}{1 + e^{-c \cdot (x - d) + b}}
$$

In [None]:
# fit the entire first wave data to a logistic model
model = GrowthCOVIDModel('logistic', normalize=True, calc_ci=True)
model.fit(cum_positives)
x, fitted = model.get_fitted

# printout
sre = np.sqrt(np.mean((cum_positives - fitted[1])**2))
a, b, c, d = model.get_params
print(
    f'Logistic function normalized parameters \n'
    f'--------------------------------------- \n'
    f'x ∈ [0, 1] and y ∈ [0, 1] where y=f(x) \n'
    f'--------------------------------------- \n'
    f'a = {a}\n'
    f'b = {b}\n'
    f'c = {c}\n'
    f'd = {d}\n'
    f'\n'
    f'goodness-of-fit \n'
    f'--------------- \n'
    f'S = {sre}\n'
    )

In [None]:
# visualize
fig = plt.figure()
ax = fig.subplots(nrows=1, ncols=1)
ax.plot(x, cum_positives, '.', color=blue, label='confirmed positive cases')
ax.plot(x, fitted[1], '-', color=blue, label=r'$a / \big( 1 + \exp (-c \cdot (x - d) + b ) \big)$')
ax.fill_between(x, fitted[0], fitted[2], color=blue, alpha=0.2)
ax.set_xlabel(r'$\Delta t$ days from Feb, $25^{th}$')
ax.set_ylabel(r'$N$')
ax.legend()
plt.show()

## 3 Heidler function

$$
f(x) = \frac{a}{b} \cdot \Big( 1 - e^{-\frac{t}{t_1}} \Big) \cdot \Big( \big(1 - e^{-\frac{t}{t_1}} \big)^n \cdot e^{-\frac{t}{t_2}} \Big)
$$

In [None]:
sensitivity = 0.85
specificity = 0.95

daily_positive = df.confirmed_positives.values
true_daily_positive = sensitivity * daily_positive
daily_tests = df.total_tests.values
daily_negative = daily_tests - daily_positive
true_daily_negative = specificity * daily_negative
false_daily_negative = daily_negative - true_daily_negative
accuracy = (true_daily_negative + true_daily_positive) / (daily_negative + daily_positive)
lower_bound = active - (1 - accuracy) * active
upper_bound = active + (1 - accuracy) * active

In [None]:
# calculate dispesion for each day in the number of daily positive infections
# from the range [lower bound, upper bound]
spread = np.array([np.random.uniform(lower_bound[i], upper_bound[i], 1000)
                   for i in range(active.size)])
y_init = spread.mean(axis=1)
sigma = spread.std(axis=1)

In [None]:
def heidler(t, t1, t2, n, I0):
    xn = np.sign(t / t1) * np.abs(t / t1) ** n
    x = xn / (1 + xn)
    y = np.exp(-t / t2)
    return I0 * x * y

model = GrowthCOVIDModel(heidler, normalize=True, calc_ci=True)#, spread=sigma, absolute_sigma=False)
model.fit(y_init)
x, fitted = model.get_fitted

# prinout
sre = np.sqrt(np.mean((active - fitted[1])**2))
t1, t2, n, I0 = model.get_params
print(
    f'Heidler function normalized parameters \n'
    f'--------------------------------------- \n'
    f'x ∈ [0, 1] and y ∈ [0, 1] where y=f(x) \n'
    f'--------------------------------------- \n'
    f't1 = {t1}\n'
    f't2 = {t2}\n'
    f'n = {n}\n'
    f'I0 = {I0}\n'
    f'\n'
    f'goodness-of-fit \n'
    f'--------------- \n'
    f'S = {sre}\n'
    )

In [None]:
# dates
t = np.arange(active.size)
start_dt = datetime.datetime(2020, 2, 25)
end_dt = start_dt + datetime.timedelta(int(t.max())+1)
dates = mdates.drange(start_dt, end_dt, datetime.timedelta(days=1))

# visualize
fig = plt.figure()
ax = fig.subplots(nrows=1, ncols=1)
ax.plot(dates, active, 'o', markersize=4, color=purple, label='$I{[}t{]}$')
ax.plot(dates, fitted[1], '-', color=blue, label=f'$\\hat I(t)$')
ax.fill_between(dates, fitted[0], fitted[2], color=blue, alpha=0.2, label='$95$\% CI')
ax.set_ylabel(r'$N$')
ax.legend(frameon=True)
fig.tight_layout()
_ = fig.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y/%m/%d'))
_ = fig.gca().xaxis.set_major_locator(mdates.DayLocator(bymonthday=[10, 25]))
_ = plt.gcf().autofmt_xdate()
plt.yticks([0, 500, 1000, 1500])
plt.ylim([-125, 1500])
plt.show()
fname = f'Heidler'
fig.savefig(fname=os.path.join(os.pardir, 'figures', fname + '.jpeg'),
            transparent=True, format='jpeg', bbox_inches='tight', dpi=2000)
fig.savefig(fname=os.path.join(os.pardir, 'figures', fname + '.pdf'),
            format='pdf', bbox_inches='tight')