In [42]:
%matplotlib inline

import pandas as pd
import numpy as np
from datetime import datetime

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns

import plotly.graph_objs as go
import plotly
plotly.__version__

'4.5.0'

In [43]:
sns.set(style='darkgrid')
mpl.rcParams['figure.figsize'] = (12,15)

In [44]:
dataPath_Raw = ("data/COVID_Flat_Table_Complete.csv")
#dataPath_Processed = ("../data/processed/")

pd.set_option("display.max_rows", 500)

In [45]:
df_analyse = pd.read_csv(dataPath_Raw, sep=",")
df_analyse.sort_values('date', ascending = True).tail()

Unnamed: 0,date,Italy,US,Spain,Germany,India,"Korea, South",China,Brazil,Afghanistan,...,Burundi,Sierra Leone,Malawi,South Sudan,Western Sahara,Sao Tome and Principe,Yemen,Comoros,Tajikistan,Lesotho
145,2020-06-15,237290,2114026,244109,187682,343091,12155,84378,888271,25527,...,85,1176,555,1693,9,662,844,176,5097,4
146,2020-06-16,237500,2137731,244328,188252,354065,12198,84422,923189,26310,...,104,1225,564,1776,9,671,885,197,5160,4
147,2020-06-17,237828,2163290,244683,188604,366946,12257,84458,955377,26874,...,104,1249,572,1813,9,683,902,197,5221,4
148,2020-06-18,238159,2191052,245268,189817,380532,12306,84494,978142,27532,...,104,1272,592,1830,9,688,909,210,5279,4
149,2020-06-19,238011,2220961,245575,190299,395048,12373,84494,1032913,27878,...,104,1298,620,1864,9,693,919,210,5338,4


## Fitting

In [46]:
from scipy import optimize
from scipy import integrate

In [47]:
ydata = np.array(df_analyse.US[90:])   #90
time = np.arange(len(ydata))

In [48]:
I0 = ydata[0]
S0 = 328000000
R0 = 0
beta

0.4

In [49]:
print(I0)

809213


In [50]:
def SIR_model_fit(SIR, time, beta, gamma):
    
    S,I,R = SIR
    dS = -beta * S * I/N0
    dI = beta * S * I/N0 - gamma * I
    dR = gamma * I
    
    return([dS, dI, dR])
    

In [51]:
def fit_odeint(x,beta,gamma):
    return integrate.odeint(SIR_model_fit, (S0,I0,R0), time, args=(beta, gamma))[:,1]

# [,:1] infected rate

In [54]:
# Integrate

popt = [0.4, 0.1]  #beta, gamma

fit_odeint(time, *popt);

In [55]:
popt, pcov = optimize.curve_fit(fit_odeint, time, ydata)
perr = np.sqrt(np.diag(pcov))


In [56]:
print('Standard deviation errors : ', str(perr), 'Infection Start : ', ydata[0])


Standard deviation errors :  [0.00146802 0.00548592] Infection Start :  809213


In [57]:
fitted = fit_odeint(time, *popt)

In [61]:
"""plt.semilogy(time, ydata, 'o')
plt.semilogy(time, fitted)
plt.title('SIR model for Germany')
plt.ylabel('Number of infected people')
plt.xlabel('Days')
#plt.show()"""

beta_fit = popt[0]
gamma_fit = popt[1]

print('Optimal Parameters : beta = ', popt[0], 'gamma = ', popt[1])
print('Reproduction number, R0 : ', popt[0]/popt[1])

Optimal Parameters : beta =  0.07578264602066746 gamma =  0.27286813452937564
Reproduction number, R0 :  0.27772625833123465


In [63]:
df_analyse.head()

Unnamed: 0,date,Italy,US,Spain,Germany,India,"Korea, South",China,Brazil,Afghanistan,...,Burundi,Sierra Leone,Malawi,South Sudan,Western Sahara,Sao Tome and Principe,Yemen,Comoros,Tajikistan,Lesotho
0,2020-01-22,0,1,0,0,0,1,548,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,1,0,0,0,1,643,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,2,0,0,0,2,920,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,2,0,0,0,2,1406,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,5,0,0,0,3,2075,0,0,...,0,0,0,0,0,0,0,0,0,0


## Processing data

In [93]:



import pandas as pd
import numpy as np

from datetime import datetime


def store_relational_JH_data():
    ''' Transforms the COVID data in a relational data set

    '''

    data_path="data/time_series_covid19_confirmed_global.csv"
    pd_raw=pd.read_csv(data_path)

    pd_data_base=pd_raw.rename(columns={'Country/Region':'country',
                      'Province/State':'state'})

    pd_data_base['state']=pd_data_base['state'].fillna('no')

    pd_data_base=pd_data_base.drop(['Lat','Long'],axis=1)


    pd_relational_model=pd_data_base.set_index(['state','country']) \
                                .T                              \
                                .stack(level=[0,1])             \
                                .reset_index()                  \
                                .rename(columns={'level_0':'date',
                                                   0:'confirmed'},
                                                  )

    pd_relational_model['date']=pd_relational_model.date.astype('datetime64[ns]')

    pd_relational_model.to_csv("data/sir_relational.csv",sep=',',index=False)
    print(' Number of rows stored: '+str(pd_relational_model.shape[0]))

if __name__ == '__main__':

    store_relational_JH_data()


 Number of rows stored: 57190


In [97]:
df_input_large=pd.read_csv('data/sir_relational.csv',sep=',', error_bad_lines=False)

In [98]:
df_input_large.head()

Unnamed: 0,date,state,country,confirmed
0,2020-01-22,Alberta,Canada,0.0
1,2020-01-22,Anguilla,United Kingdom,0.0
2,2020-01-22,Anhui,China,1.0
3,2020-01-22,Aruba,Netherlands,0.0
4,2020-01-22,Australian Capital Territory,Australia,0.0


In [None]:
# %load ../src/visualization/visualize.py
import pandas as pd
import numpy as np

import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State

import plotly.graph_objects as go

import os
print(os.getcwd())

# COVID_final_set_TEST - Doubling rate modified for preventing breaks in the graph

df_input_large=pd.read_csv('data/sir_relational.csv',sep=',', error_bad_lines=False)


fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Applied Data Science on COVID-19 data

    Goal of the project is to teach data science by applying a cross industry standard process,
    it covers the full walkthrough of: automated data gathering, data transformations,
    filtering and machine learning to approximating the doubling time, and
    (static) deployment of responsive dashboard.

    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization
    '''),


    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in df_input_large['country'].unique()],
        value=['US', 'Germany','Italy'], # which are pre-selected # 'United Kingdom', 'Spain'
        multi=True
    ),

    dcc.Markdown('''
        ## Select Timeline of confirmed COVID-19 cases or the approximated doubling time
        '''),


    dcc.Dropdown(
    id='doubling_time',
    options=[
        {'label': 'Timeline Confirmed ', 'value': 'confirmed'},

    ],
    value='confirmed',
    multi=False
    ),

    dcc.Graph(figure=fig, id='main_window_slope')
])



@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value')])
def update_figure(country_list,show_doubling):

    #print(show_doubling)
    if '_DR' in show_doubling:
        my_yaxis={'type':"log",
               'title':'Approximated doubling rate over 3 days (larger numbers are better #stayathome)'
              }
    else:
        my_yaxis={'type':"log",
                  'title':'Confirmed infected people (source johns hopkins csse, log-scale)'
              }


    traces = []
    for each in country_list:

        df_plot=df_input_large[df_input_large['country']==each]

        if show_doubling=='confirmed':
            df_plot=df_plot[['state','country','confirmed','date']].groupby(['country','date']).agg(np.mean).reset_index()
        else:
            df_plot=df_plot[['state','country','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['country','date']).agg(np.sum).reset_index()
            #print(show_doubling)


        traces.append(dict(x=df_plot.date,
                                y=df_plot[show_doubling],
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                        )
                )

    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,

                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },

                yaxis=my_yaxis
        )
    }

if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)
