In [67]:
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
from scipy.optimize import curve_fit
from datetime import *
import plotly.express as px
import plotly.graph_objects as go
import scipy.special as spec



In [116]:
# approximant
def fit_fun(x,A,x0,k):
    #better use this to avoid overflows becasue of using exp
    return A*spec.expit(k*(x - x0))
    #return A / (1 + sp.exp(-k*(x-x0))) 

def dfit_fun(x,A,x0,k):
    return A* sp.exp(-k*(x-x0)) * k / (1 + sp.exp(-k*(x-x0)))**2

# normalization doesnt really work
"""
def normalize(df):
    df.loc[:, "new_cases_norm"] = (df.loc[:, "new_cases"]/df.loc[:, "new_tests"]).values
    df.loc[:, "total_cases_norm"] = (df.loc[:, "new_cases_norm"]).values
    for i in df.index[1:]:
        df.loc[i, "total_cases_norm"] = df.loc[i-1, "total_cases_norm"] + df.loc[i, "new_cases_norm"]
"""

# loads data for country specified by iso code (3 letter)
# returns a DataFrame
def load_data(code):
    data = pd.read_csv('https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-data.csv?raw=true')
    df = data.loc[data["iso_code"] == code, ["location","date", "total_cases", "new_cases"]]
    df.loc[:,"date" ] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    first_day_cases = df["total_cases"].iloc[0]
    first_day = 0
    while first_day_cases == 0 :
        first_day += 1
        first_day_cases = df["total_cases"].iloc[first_day]
    df = df.iloc[first_day-1:]
    return df.reset_index().drop("index", 1)


# approximation
# df is the dataframe from load_data
# high_cutoff_date sets the last date for fitting, if given 
# low_cutoff_date sets the first date for fitting, if given 
# format by default is e.g. "2020-03-27"
# retruns optimal parameters of fit_fun and covariance matrix
def fit(df, high_cutoff_date=None, low_cutoff_date=None, format="%Y-%m-%d"):
    df_cut = df
    if high_cutoff_date is not None:
        df_cut = df_cut.loc[df_cut["date"] <= datetime.strptime(high_cutoff_date,format)]
    if low_cutoff_date is not None:
        df_cut = df_cut.loc[df_cut["date"] >= datetime.strptime(low_cutoff_date,format)]
    x = df_cut.index.values
    y = df_cut.loc[:, "total_cases"].values
    y_max = y[-1]
    #print(x, y_norm)
    popt, pcov = curve_fit(fit_fun, x, y, p0 = (y_max, 60, 1))
    return popt, pcov

# generates all posible fits described in events_dict and fit over all data
# events_dict must have keys "date" with the date of an event and "event" with short description
# return DataFrame with popt and pcov
def get_fit_of_events(df, events_dict, format="%Y-%m-%d"):
    dates = events_dict["date"]
    events = events_dict["event"]
    popt = []
    pcov = []
    for d in dates:
        po, pc = fit(df, d, format=format)
        popt.append(po)
        pcov.append(pc)
    po, pc = fit(df)
    popt.append(po)
    pcov.append(pc)
    dates.append(datetime.strftime(df["date"].iloc[-1], format))
    events.append("Last available data")
    params={
        "date": dates,
        "event": events,
        "popt": popt,
        "pcov": pcov        
    }
    return pd.DataFrame(params)

# plots camulative cases and total over time and approximants, if params given
# df - data from load_data
# params - from get_fit_of_events 
def plotter(df, params={"date":[], "event":[]}, format="%Y-%m-%d", N_pred = 10):
    fig = go.Figure()
    x = df.index.values
    x_pred = sp.arange(x[0], x[-1]+N_pred)    
    dates_pred = pd.date_range(df["date"].iloc[0], df["date"].iloc[-1] + timedelta(days=N_pred))
    fig.add_trace(go.Scatter(x=df["date"], y=df["total_cases"], mode='markers', name="Camulative cases"))
    for i in range(len(params["date"])):
        fig.add_trace(go.Scatter(x=dates_pred, y=fit_fun(x_pred, *params["popt"][i]), name = params["event"][i]))
        #d_x = df.index[df["date"] == datetime.strptime(d,"%Y-%m-%d")].values[0]
        date = datetime.strptime(params["date"][i],format)
        fig.add_shape(
        # Line Vertical
        dict(
            type="line",
            x0=date,
            x1=date,
            y0=0,
            y1=df["total_cases"].iloc[-1]))
    fig.update_layout(
        title=df.loc[0,"location"],
        xaxis_title="Date",
        yaxis_title="Cases",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#7f7f7f"
        ))
    fig.show()
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["date"], y=df["new_cases"], mode='markers', name="New cases"))
    for i in range(len(params["date"])):
        fig.add_trace(go.Scatter(x=dates_pred, y=dfit_fun(x_pred, *params["popt"][i]), name = params["event"][i]))
        #d_x = df.index[df["date"] == datetime.strptime(d,"%Y-%m-%d")].values[0]
        date = datetime.strptime(params["date"][i],format)
        fig.add_shape(
        # Line Vertical
        dict(
            type="line",
            x0=date,
            x1=date,
            y0=0,
            y1=max(df["new_cases"].values)))
    fig.update_layout(
        title=df.loc[0,"location"],
        xaxis_title="Date",
        yaxis_title="Cases",
        font=dict(
            family="Courier New, monospace",
            size=12,
            color="#7f7f7f"
        ))
    fig.show()


# unfolds popt and pcov  from get_fit_of_events
# returns dataframe with each parameter and its standart deviation
def unfold(params):
    sigmas = []
    for pc in params["pcov"]:
        sigmas.append(sp.sqrt(sp.diagonal(pc)))
    df = params.loc[:, ["date", "event"]]
    df["A"] = [params["popt"][k][0] for k in params.index]
    df["\sigma_A"] = sp.array(sigmas)[:,0]
    df["x_0"] = [params["popt"][k][1] for k in params.index]
    df["\sigma_x_0"] = sp.array(sigmas)[:,1]
    df["k"] = [params["popt"][k][2] for k in params.index]
    df["\sigma_k"] = sp.array(sigmas)[:,2]
    return df
    
    

## South Korea

In [117]:
df = load_data("KOR")
params = get_fit_of_events(df,{
    "date": ["2020-03-08"],
    "event": ["some description"]
})
display(unfold(params))
plotter(df, params)

Unnamed: 0,date,event,A,\sigma_A,x_0,\sigma_x_0,k,\sigma_k
0,2020-03-08,some description,7727.700704,96.170589,42.636574,0.099428,0.355973,0.007317
1,2020-05-07,Last available data,10321.838373,75.021662,46.422966,0.304445,0.172967,0.007863


## Japan


In [121]:
df = load_data("JPN")
params = get_fit_of_events(df,{
    "date": ["2020-03-29"],
    "event": ["some description"]
})
display(unfold(params))
plotter(df, params)

Unnamed: 0,date,event,A,\sigma_A,x_0,\sigma_x_0,k,\sigma_k
0,2020-03-29,some description,2752.517604,258.578483,72.66829,1.965071,0.091282,0.003844
1,2020-05-07,Last available data,16533.696256,168.706126,92.649379,0.241577,0.126834,0.002448


## Italy


In [122]:
df = load_data( "ITA")
params = get_fit_of_events(df,{
    "date": ["2020-03-29"],
    "event": ["some description"]
})
display(unfold(params))
plotter(df, params)

Unnamed: 0,date,event,A,\sigma_A,x_0,\sigma_x_0,k,\sigma_k
0,2020-03-29,some description,124714.966389,1696.861077,53.693092,0.162853,0.188557,0.002067
1,2020-05-07,Last available data,210642.853208,1889.090055,62.958013,0.297058,0.107748,0.002573
