# COVID19 update
+ based on https://github.com/CSSEGISandData/COVID-19/tree/master/archived_data from John Hopkins University
+ Original source: https://www.who.int/emergencies/diseases/novel-coronavirus-2019/situation-reports
+ A very good article with a lot of insights is here
 https://medium.com/@tomaspueyo/coronavirus-act-today-or-people-will-die-f4d3d9cd99ca


In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import os

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from IPython.display import display, Markdown
import collections
import datetime

In [None]:
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

In [None]:
ccpw = r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
cdpw = r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'
crpw = r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv'

df_recovered_all = pd.read_csv(crpw).dropna(axis = 1, how = 'all')
df_confirmed_all = pd.read_csv(ccpw).dropna(axis = 1, how = 'all')
df_deaths_all = pd.read_csv(cdpw).dropna(axis = 1, how = 'all')

In [None]:
LocationColumns = ['Province/State', 'Country/Region', 'Lat', 'Long']
DataColumns = list(df_confirmed_all.columns[4:])
DataColumnsDT = pd.to_datetime(DataColumns)
DataColumnsDT[:2], DataColumnsDT[-2:]

## Some data info

In [None]:
countries = df_confirmed_all['Country/Region'].unique()
countries.sort()
display(Markdown('*List of Countries*'))
print(countries)

In [None]:
us_county_states = df_confirmed_all[df_confirmed_all['Country/Region'] == 'US']['Province/State'].unique()
us_county_states.sort()
display(Markdown('*List of US States*'))
us_states = sorted([rs for rs in us_county_states if not ',' in rs])
print (us_states)
display(Markdown('*List of US Counties*'))
print ( us_county_states)
princesses = sorted([rs for rs in us_county_states if  'Princess' in rs])
display(Markdown('*List of Cruise Ships*'))
print ( princesses)

## Some code

In [None]:
last_update, m_series['count'].index[-1]

In [None]:
def compute_series(df_metric):
    m_series = {}
    # count
    count = df_metric[DataColumns].sum(axis = 0)
    count = df_metric[DataColumns].sum(axis = 0)
    count.index = DataColumnsDT
    count = count[count > 0]
    m_series['count'] = count
    m_series['log(count)'] = np.log(count)
    m_series['d(count)/dt'] = count.diff()
    m_series['d(log(count))/dt'] = count.diff()/count
    return m_series

def display_series(m_series, series_label, selection_location, save_fig_dir = '.'):
    plt.figure(figsize = (16,10))
    stitle = '{0} charts for {1} as of {2}'.format(series_label, selection_location, last_update )
    plt.suptitle(stitle, fontsize = 16)
    plot_index = 220
    for key in m_series.keys():
        series = m_series[key]
        plot_index += 1
        plt.subplot(plot_index)
        title = '{0} {1}'.format(series_label, key)
        series.plot(grid = True, title = title)
    fpath = os.path.join(save_fig_dir, '{0}.png'.format(stitle))
    print(fpath)
    fig1 = plt.gcf()
    fig1.tight_layout(pad = 1)
    fig1.subplots_adjust(top=0.9)
    fig1.savefig(fpath)
    v,t = m_series['count'][-1], m_series['count'].index[-1]
    s = '### Latest number of {0} in {1} is *{2}* as of {3}'.format(series_label, location_label, v,t )
    display(Markdown(s))
    return


In [None]:
def model_func(x, a):
    return np.exp(a*x)

def coeff_evaluate_mean(m_series, days_back):
    av_alpha = m_series['d(log(count))/dt'][-days_back:].mean()
    return av_alpha, None

def coeff_evaluate_bestfit(m_series, days_back):
    x = np.arange(0, days_back,1)
    y = m_series['count'][-days_back:].values
    y = y/y[0]
    approx, _ = coeff_evaluate_mean(m_series, days_back)
    popt, pcov = curve_fit(model_func, x, y, p0=( approx))
    params = collections.OrderedDict()
    params['days'] = x
    params['data'] = y
    params['fitted'] = model_func(x, popt[0])
    params['popt'] = popt
    params['pcov'] = pcov
    params['approx'] = approx
    return popt[0], params

def multi_factors(m_series, coeff_evaluate, days_back = 7):
    av_alpha, _ = coeff_evaluate(m_series, days_back)
    count = m_series['count']
    day_factor = np.exp(av_alpha)
    week_factor = np.exp(av_alpha)**7
    projected_week = count[-1]*week_factor
    return day_factor, week_factor, projected_week
    
def multi_factors1(m_series, days_back = 1):
    av_alpha = m_series['d(log(count))/dt'][-days_back:].mean()
    count = m_series['count']
    day_factor = np.exp(av_alpha)
    week_factor = np.exp(av_alpha)**7
    projected_week = count[-1]*week_factor
    return day_factor, week_factor, projected_week

def display_factors(day_factor, week_factor, projected_week, series_label, location_label):
    print ('Every week the number of {2} increases by a factor of {0} in {1}'.format(week_factor, 
                                                                                     location_label, series_label))
    display(Markdown('### projected number of {0} one week from now is {1:.0f} in {2}'.format(series_label, 
                                                        projected_week, location_label)))

def show_prediction(m_series, series_label, location_label, days_back, days_forward):
    def compute_prediction(m_series, days_forward, days_back):
        total_days = days_forward + days_back
        count = m_series['count']
        coeff, _ = coeff_evaluate_bestfit(m_series, days_back)
        t0, v0 = count.index[-days_back], count[-days_back]
        t_predict = pd.date_range(t0, periods = total_days)
        tmp = np.arange(0,total_days)
        val_predict = v0*model_func(tmp,coeff)
        s_predict= pd.Series(val_predict, t_predict)
        return count[-days_back:], s_predict

    data, predicted = compute_prediction(m_series, days_forward, days_back)
    plt.figure(figsize = (12,8))
    data.plot(label = 'data', marker = '*')
    predicted.plot(label = 'predicted')
    plt.grid(True)
    title = 'best fit prediction {0} days forward for {1} in {2} as of {3}'.format(days_forward, 
                                                                         series_label, location_label, last_update)
    plt.title(title)
    plt.legend()
    plt.savefig(os.path.join(fig_dir,title))


### Global settings

In [None]:
work_dir = r'c:\tmp'
day_count = 7
days_back = day_count
days_forward = 14

In [None]:
last_update = DataColumnsDT[-1]
last_update = str(pd.Timestamp(last_update)).split(' ')[0]

## Selection

### Country: pick one from the list of the countries above

In [None]:
# country = 'Korea, South'
country = 'Italy'

location_label = country
location_dir = location_label.replace(',','').replace("'", '')
fig_dir = os.path.join(work_dir, 'covid19', last_update, location_dir)
os.makedirs(fig_dir, exist_ok = True)
print (fig_dir)

In [None]:
selection = df_confirmed_all['Country/Region'] == country
df_confirmed = df_confirmed_all[selection]
df_recovered = df_recovered_all[selection]
df_deaths    = df_deaths_all[selection]

In [None]:
series_label = 'confirmed cases'
m_series = compute_series(df_confirmed)
display_series(m_series, series_label, location_label, fig_dir)    


Derivative of a logarithm of an exponential function $confirmed = c\exp(\alpha t)$ is $\alpha$. An estimate of how much the population of confirmed cases increases daily is $\exp(\bar \alpha)$ where $\bar \alpha$ is the average value of $\alpha$

In [None]:
day_factor, week_factor, projected_week = multi_factors(m_series, coeff_evaluate_bestfit)
display_factors(day_factor, week_factor, projected_week, series_label, location_label)

# Predictions

### Show fit. Use it to assess how reasonable the prediction is going to be

In [None]:
coeff, params = coeff_evaluate_bestfit(m_series, days_back)
pd.DataFrame({'raw data': params['data'], 'fitted': params['fitted']}).plot(grid = True, title = 'best fit')

In [None]:
show_prediction(m_series, series_label, location_label, days_back, days_forward)

# Deaths

In [None]:
series_label = 'deaths'
m_series = compute_series(df_deaths)
display_series(m_series, series_label, location_label,fig_dir)    


In [None]:
day_factor, week_factor, projected_week = multi_factors(m_series, coeff_evaluate_bestfit)
display_factors(day_factor, week_factor, projected_week, series_label, location_label)

In [None]:
show_prediction(m_series, series_label, location_label, days_back, days_forward)

# Recovered

In [None]:
series_label = 'recovered'
m_series = compute_series(df_recovered)
display_series(m_series, series_label, location_label,fig_dir)    


In [None]:
day_factor, week_factor, projected_week = multi_factors(m_series, coeff_evaluate_bestfit)
display_factors(day_factor, week_factor, projected_week, series_label, location_label)

In [None]:
show_prediction(m_series, series_label, location_label, days_back, days_forward)

## Total World Wide

In [None]:
total_confirmed = df_confirmed_all.sum()[2:]
total_deaths = df_deaths_all.sum()[2:]

In [None]:
plt.figure(figsize = (15,6))
plt.subplot(131)
total_confirmed.plot()
title = 'total confirmed'
plt.title(title)
plt.grid()
plt.subplot(132)
total_deaths.plot()
title = 'total deaths'
plt.title(title)
plt.grid()
plt.subplot(133)

(total_deaths/total_confirmed*100).plot()
title = 'total death rate %'.format(country)
plt.title(title)
plt.grid()