In [7]:
import pandas as pd
import scipy as sp
from scipy.optimize import curve_fit
from datetime import *
import plotly.express as px
import plotly.graph_objects as go
import scipy.special as spec

from plotly.subplots import make_subplots




In [267]:
# approximant
def fit_fun(x,A,x0,k):
    #better use this to avoid overflows becasue of using exp
    return A*spec.expit(k*(x - x0))
    #return A / (1 + sp.exp(-k*(x-x0))) 

def dfit_fun(x,A,x0,k):
    return A* sp.exp(-k*(x-x0)) * k / (1 + sp.exp(-k*(x-x0)))**2

def date(string, format="%Y-%m-%d"):
    return datetime.strptime(string, format)

# normalization doesnt really work
"""
def normalize(df):
    df.loc[:, "new_cases_norm"] = (df.loc[:, "new_cases"]/df.loc[:, "new_tests"]).values
    df.loc[:, "total_cases_norm"] = (df.loc[:, "new_cases_norm"]).values
    for i in df.index[1:]:
        df.loc[i, "total_cases_norm"] = df.loc[i-1, "total_cases_norm"] + df.loc[i, "new_cases_norm"]
"""

# loads data for country specified by iso code (3 letter)
# returns a DataFrame
def load_data(code):
    data = pd.read_csv('https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-data.csv?raw=true')
    df = data.loc[data["iso_code"] == code, ["location","date", "total_cases", "new_cases"]]
    df.loc[:,"date" ] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    first_day_cases = df["total_cases"].iloc[0]
    first_day = 0
    while first_day_cases == 0 :
        first_day += 1
        first_day_cases = df["total_cases"].iloc[first_day]
    df = df.iloc[first_day-1:]
    return df.reset_index().drop("index", 1)


# approximation
# df is the dataframe from load_data
# high_cutoff_date sets the last date for fitting, if given 
# low_cutoff_date sets the first date for fitting, if given 
# format by default is e.g. "2020-03-27"
# retruns optimal parameters of fit_fun and covariance matrix
def fit(df, high_cutoff_date=None, low_cutoff_date=None):
    df_cut = df
    if high_cutoff_date is not None:
        df_cut = df_cut.loc[df_cut["date"] <= high_cutoff_date]
    if low_cutoff_date is not None:
        df_cut = df_cut.loc[df_cut["date"] >= low_cutoff_date]
    x = df_cut.index.values
    y = df_cut.loc[:, "total_cases"].values
    y_max = y[-1]
    #print(x, y_norm)
    popt, pcov = curve_fit(fit_fun, x, y, p0 = (y_max, 60, 1))
    return popt, pcov

# generates all posible fits described in events_dict and fit over all data
# events_dict must have keys "date" with the date of an event and "event" with short description
# return DataFrame with popt and pcov
def get_fit_of_cutoffs(df, cutoffs_dict, format="%Y-%m-%d"):
    dates = cutoffs_dict["date"]
    #print(dates)
    descrps = cutoffs_dict["description"]
    popt = []
    pcov = []
    for d in dates:
        po, pc = fit(df, d)
        popt.append(po)
        pcov.append(pc)
    po, pc = fit(df)
    popt.append(po)
    pcov.append(pc)
    dates = dates.append(pd.Series(df["date"].iloc[-1]))
    descrps = descrps.append(pd.Series("Last available data"))
    params=pd.DataFrame()
    params["date"] = dates
    params["description"] = descrps
    params["popt"] = popt
    params["pcov"] = pcov
    return params.reset_index()

# plots camulative cases and total over time and approximants, if params given
# df - data from load_data
# params - from get_fit_of_events 
def plotter(df, params=None, events = None,  N_pred = 10):
    # prepare go's
    fig_merged = make_subplots(specs=[[{"secondary_y": True}]])
    fig_camulative = go.Figure()
    fig_new = go.Figure()
    fig_camulative.update_layout(
        title=df.loc[0,"location"],
        xaxis_title="Date",
        yaxis_title="Camulative number of cases",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#7f7f7f"
        )
    )
    fig_new.update_layout(
        title=df.loc[0,"location"],
        xaxis_title="Date",
        yaxis_title="Number of new cases",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#7f7f7f"
        )
    )
    fig_merged.update_layout(
        title=df.loc[0,"location"],
        xaxis_title="Date",
        yaxis_title="Camulative number of cases",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#7f7f7f"
        )
    )
    fig_merged.update_yaxes(title_text="Number of new cases", secondary_y=True)
    figs = [fig_camulative, fig_new, fig_merged]
    # prepare x-axis data
    x = df["date"]
    #prepare y-axis data
    y_camulative = df["total_cases"].values
    y_new = df["new_cases"].values
    # plot actual data
    fig_camulative.add_trace(go.Scatter(x=x, y=y_camulative, mode='markers', name="Data"))
    fig_new.add_trace(go.Scatter(x=x, y=y_new, mode='markers', name="Data"))
    fig_merged.add_trace(go.Scatter(x=x, y=y_camulative, mode='markers', name="Data (camulative)"))
    fig_merged.add_trace(go.Scatter(x=x, y=y_new, mode='markers', name="Data (new cases)"), secondary_y=True)
    # plot approximants
    if not params is None:
        x_pred = pd.date_range(df["date"].iloc[0], df["date"].iloc[-1] + timedelta(days=N_pred))  
        x_pred_for_fun = sp.arange(df.index[0], df.index[-1]+N_pred)
        for i in range(len(params)):
            y = fit_fun(x_pred_for_fun, *params["popt"].iloc[i])
            dy = dfit_fun(x_pred_for_fun, *params["popt"].iloc[i])
            fig_camulative.add_trace(go.Scatter(x=x_pred, y=y, name=params["description"].iloc[i]))
            fig_new.add_trace(go.Scatter(x=x_pred, y=dy, name=params["description"].iloc[i]))
            fig_merged.add_trace(go.Scatter(x=x_pred, y=y, name=params["description"].iloc[i]))
            fig_merged.add_trace(go.Scatter(x=x_pred, y=dy, name=params["description"].iloc[i]), secondary_y=True)
            date = params["date"].iloc[i]
            for fig in figs:
                fig.add_shape(
                    dict(
                        type="line",
                        x0=date,
                        x1=date,
                        y0=0,
                        y1=1,
                        yref='paper',
                        line=dict(
                            color="Black",
                            width=1,
                            dash="dash"
                        ),
                        opacity=0.6
                    )
                )    
    return fig_camulative, fig_new, fig_merged
    

# unfolds popt and pcov  from get_fit_of_events
# returns dataframe with each parameter and its standart deviation
def unfold(params):
    sigmas = []
    for pc in params["pcov"]:
        sigmas.append(sp.sqrt(sp.diagonal(pc)))
    df = params.loc[:, ["date", "description"]]
    df["A"] = [params["popt"][k][0] for k in params.index]
    df["\sigma_A"] = sp.array(sigmas)[:,0]
    df["x_0"] = [params["popt"][k][1] for k in params.index]
    df["\sigma_x_0"] = sp.array(sigmas)[:,1]
    df["k"] = [params["popt"][k][2] for k in params.index]
    df["\sigma_k"] = sp.array(sigmas)[:,2]
    return df
    
# functions for post editing figures
# add a vertical line
def add_vline(fig, x, color="Black", width=1, dash="solid", opacity=1, layer = "above"):
    fig.add_shape(
        type = "line",
        xref="x",
        x0=x,
        x1=x,
        y0=0,
        y1=1,
        yref="paper",
        layer=layer,
        line=dict(
            color=color,
            width=width,
            dash=dash
        ),
        opacity = opacity        
    )
# add a filled region
def add_filling(fig, x0, x1, fill_color="Red", opacity=0.3, layer="above"):
    fig.add_shape(
        type="rect",
        xref="x",
        yref="paper",
        x0=x0,
        y0=0,
        x1=x1,
        y1=1,
        fillcolor=fill_color,
        opacity=opacity,
        layer=layer,
        line_width=0
    )

def add_caption(fig, x, text, pos_x=0, pos_y=-20, text_color="Black", bg_color="Pink", br_color="Grey", ar_color="Grey", br_width=1, ar_width=1, opacity=0.7, layer="above", wline=True):
    fig.add_annotation(
        x=x,
        y=1,
        xref="x",
        yref="paper",
        text=text,
        showarrow=True,
        font=dict(
            family="Courier New, monospace",
            size=12,
            color=text_color
            ),
        align="center",
        arrowhead=2,
        arrowsize=1,
        arrowwidth=ar_width,
        arrowcolor=ar_color,
        ax=pos_x,
        ay=pos_y,
        bordercolor=br_color,
        borderwidth=br_width,
        borderpad=4,
        bgcolor=bg_color,
        opacity=0.7
    )
    if wline:
        add_vline(fig, x, color=br_color, width=br_width, layer=layer)

## South Korea

In [90]:
df = load_data("KOR")
cutoffs = pd.DataFrame({
    "date": [date("2020-03-08")],
    "description": ["Before 08.03"]
})

In [269]:
params = get_fit_of_cutoffs(df, cutoffs)
figs = plotter(df, params, events)
display(unfold(params))
for f in figs:
    add_filling(f, x0=date("2020-02-18"), x1=date("2020-02-23"))
    add_caption(f, date("2020-02-18"), text="Patient 31", pos_x=-50)
    add_caption(f, date("2020-02-23"), "Patient 31 isolated", pos_x= 50)
    add_caption(f, date("2020-02-20-12", format="%Y-%m-%d-%H"), "Shincheonji Church incident", pos_x=- 10, pos_y = -50, wline=False)
    add_filling(f, date("2020-02-20"), date("2020-03-08"), fill_color="Green")
    add_caption(f, date("2020-02-29"), "Additional measures imposed<br>Tracking of church members", bg_color="LightGreen",pos_x=160, pos_y=-60, wline=False)
    f.show()

Unnamed: 0,date,description,A,\sigma_A,x_0,\sigma_x_0,k,\sigma_k
0,2020-03-08,Before 08.03,7727.700704,96.170589,42.636574,0.099428,0.355973,0.007317
1,2020-05-13,Last available data,10404.280956,71.092183,46.60598,0.305482,0.168231,0.007475


## Japan


In [275]:
df = load_data("JPN")
cutoffs = pd.DataFrame({
    "date": ["2020-03-15"],
    "description": ["Before 29.03"]
})

In [276]:
params = get_fit_of_cutoffs(df, cutoffs)
figs = plotter(df, params, events)
display(unfold(params))
for f in figs:
    #add_filling(f, x0=date("2020-02-18"), x1=date("2020-02-23"))
    #add_caption(f, date("2020-02-18"), text="Patient 31", pos_x=-50)
    #add_caption(f, date("2020-02-23"), "Patient 31 isolated", pos_x= 50)
    #add_caption(f, date("2020-02-20-12", format="%Y-%m-%d-%H"), "Shincheonji Church incident", pos_x=- 10, pos_y = -50, wline=False)
    #add_filling(f, date("2020-02-20"), date("2020-03-08"), fill_color="Green")
    #add_caption(f, date("2020-02-29"), "Additional measures imposed<br>Tracking of church members", bg_color="LightGreen",pos_x=160, pos_y=-60, wline=False)
    f.show()

Unnamed: 0,date,description,A,\sigma_A,x_0,\sigma_x_0,k,\sigma_k
0,2020-03-15,Before 29.03,3007.76052,583.102126,70.942374,2.724322,0.104354,0.003186
1,2020-05-13 00:00:00,Last available data,16473.78586,109.083155,92.57528,0.177492,0.127453,0.002083


## Italy


In [280]:
df = load_data("ITA")
cutoffs = pd.DataFrame({
    "date": ["2020-03-15"],
    "description": ["Before 29.03"]
})

In [281]:
params = get_fit_of_cutoffs(df, cutoffs)
figs = plotter(df, params, events)
display(unfold(params))
for f in figs:
    #add_filling(f, x0=date("2020-02-18"), x1=date("2020-02-23"))
    #add_caption(f, date("2020-02-18"), text="Patient 31", pos_x=-50)
    #add_caption(f, date("2020-02-23"), "Patient 31 isolated", pos_x= 50)
    #add_caption(f, date("2020-02-20-12", format="%Y-%m-%d-%H"), "Shincheonji Church incident", pos_x=- 10, pos_y = -50, wline=False)
    #add_filling(f, date("2020-02-20"), date("2020-03-08"), fill_color="Green")
    #add_caption(f, date("2020-02-29"), "Additional measures imposed<br>Tracking of church members", bg_color="LightGreen",pos_x=160, pos_y=-60, wline=False)
    f.show()

Unnamed: 0,date,description,A,\sigma_A,x_0,\sigma_x_0,k,\sigma_k
0,2020-03-15,Before 29.03,59867.45173,5752.194554,47.69974,0.673594,0.230424,0.005263
1,2020-05-13 00:00:00,Last available data,216300.025113,1700.577733,63.698633,0.288747,0.103236,0.002379


### Russia (Moscow)

### China (Hubei)