## SIR modelling and Visualising in DASH Framework of COVID 19 Data

In [1]:
# add necessary imports
import pandas as pd
import numpy as np

from datetime import datetime
import pandas as pd 

from scipy import optimize
from scipy import integrate

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
sns.set(style="darkgrid")

mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 1000)


## Dataset for analysis


In [3]:
#Read the data
df_analyse=pd.read_csv('../data/processed/COVID_small_flat_table.csv',sep=';')  
df_analyse.sort_values('date',ascending=True).tail()
df_analyse = df_analyse.drop(columns={'Unnamed: 0'}, axis=1)

# Fitting the parameters of SIR model

In [130]:
# reference: https://scipython.com/book/chapter-8-scipy/additional-examples/the-sir-epidemic-model/
# beta/gamma is denoted as basic reproduction number.

N= 1000000 # max susceptible population = It is also the country's entire population
I0=df_analyse.Germany[35] # No. of days required to get a total infections of around 100.
R0=0
S0=N-(I0+R0)
# condition I0+S0+R0=N0


beta=0.4   # infection spread dynamics = It represents how many people can be infected from a single person. (here 0.4 means 4 people can get infected from 1 person)
gamma=0.1  # recovery rate

ydata = np.array(df_analyse.Germany[35:])
t=np.arange(len(ydata)) #time period


In [131]:
# SIR stands for S- susceptible, I - infected, R- recovered. The model suits perfectly for COVID scenario.
# NOTE: SIR model works on the assumption that recovered people will not get the infection again.
def SIR_model_t(SIR,t,N,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        t: time step, mandatory for integral.odeint
        I: infected people
        R: recovered people
        beta: infected rate
        gamma: recovery rate
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N    
    dI_dt=beta*S*I/N-gamma*I
    dR_dt=gamma*I
    return dS_dt,dI_dt,dR_dt

In [132]:
#Integrate the SIR equations over the time grid, t.
def fit_odeint(t, N, beta, gamma):
    return integrate.odeint(SIR_model_t, (S0, I0, R0), t, args=(N, beta, gamma))[:,1] # we only would like to get dI

In [133]:
# the resulting curve has to be fitted
# free parameters are here beta and gamma
fit_odeint(t, N, beta, gamma)

array([ 2.10000000e+01,  2.83467442e+01,  3.82635349e+01,  5.16492851e+01,
        6.97171980e+01,  9.41045567e+01,  1.27020786e+02,  1.71447062e+02,
        2.31405299e+02,  3.12320402e+02,  4.21507742e+02,  5.68828389e+02,
        7.67568562e+02,  1.03561758e+03,  1.39704132e+03,  1.88417601e+03,
        2.54040014e+03,  3.42377774e+03,  4.61179988e+03,  6.20746901e+03,
        8.34695108e+03,  1.12089170e+04,  1.50254352e+04,  2.00937383e+04,
        2.67872046e+04,  3.55622587e+04,  4.69554712e+04,  6.15620761e+04,
        7.99843269e+04,  1.02737867e+05,  1.30110563e+05,  1.61985487e+05,
        1.97668411e+05,  2.35790176e+05,  2.74361695e+05,  3.11019967e+05,
        3.43420416e+05,  3.69650711e+05,  3.88523547e+05,  3.99666850e+05,
        4.03424001e+05,  4.00642226e+05,  3.92438005e+05,  3.80000472e+05,
        3.64456984e+05,  3.46798540e+05,  3.27850544e+05,  3.08272539e+05,
        2.88573617e+05,  2.69134372e+05,  2.50230013e+05,  2.32051744e+05,
        2.14725295e+05,  

In [144]:
popt, pcov = optimize.curve_fit(fit_odeint, t, ydata)
perr = np.sqrt(np.diag(pcov))
    
print('standard deviation errors : ',str(perr), ' start infect:',ydata[0])
print("Optimal parameters: beta =", popt[0], " and gamma = ", popt[1])



standard deviation errors :  [inf inf inf]  start infect: 21
Optimal parameters: beta = -11.247286896113062  and gamma =  1.5505039336713


In [145]:
# get the final fitted curve
fitted=fit_odeint(t, N, popt[0], popt[1])
fitted

array([ 2.10000000e+01,  5.81062169e-05, -4.09917949e-10, -6.08620350e-11,
       -2.91656541e-11, -2.31015730e-12, -1.25593214e-12, -2.01706981e-13,
       -7.91700679e-14, -7.55555050e-14, -7.19409421e-14, -6.83263792e-14,
       -6.47118162e-14, -6.10972533e-14, -5.74826904e-14, -5.38681275e-14,
       -5.02535645e-14, -4.66390016e-14, -4.30244387e-14, -3.94098758e-14,
       -3.57953129e-14, -3.21807499e-14, -2.85661870e-14, -2.49516241e-14,
       -2.13370612e-14, -1.77224982e-14, -1.41079353e-14, -1.04933724e-14,
       -6.87880948e-15, -3.26424656e-15, -2.80275846e-16, -2.67882392e-16,
       -2.55488938e-16, -2.43095484e-16, -2.30702030e-16, -2.18308576e-16,
       -2.05915122e-16, -1.93521668e-16, -1.81128214e-16, -1.68734760e-16,
       -1.56341307e-16, -1.43947853e-16, -1.31554399e-16, -1.19160945e-16,
       -1.06767491e-16, -9.43740370e-17, -8.19805831e-17, -6.95871292e-17,
       -5.71936753e-17, -4.48002214e-17, -3.24067675e-17, -2.00133136e-17,
       -7.61985973e-18, -

In [None]:
plt.semilogy(t, ydata, 'o')
plt.semilogy(t, fitted)
plt.title("Fit of SIR model for Germany cases")
plt.ylabel("Population infected")
plt.xlabel("Days")
plt.show()
print("Optimal parameters: beta =", popt[0], " and gamma = ", popt[1])
print("Basic Reproduction Number R0 " , popt[0]/ popt[1])
print("This ratio is derived as the expected number of new infections (these new infections are sometimes called secondary infections from a single infection in a population where all subjects are susceptible. @wiki")

### Store the fitted data back into the csv file.

In [None]:
SIR_Germany = np.concatenate([np.zeros(35,dtype=float), fitted])
df_analyse['SIR_Germany'] = SIR_Germany

In [None]:
data_path='../data/processed/small_table_updated.csv'
df_analyse.to_csv(data_path, sep=';', index=False)

## Visualise the fitted SIR model using Dash Framework

In [10]:
df_analyse=pd.read_csv('../data/processed/small_table_updated.csv',sep=';', index_col=False)  
df_analyse.sort_values('date',ascending=True).tail()

Unnamed: 0,date,US,India,Germany,Italy,Australia,SIR_Italy,SIR_Germany,SIR_Australia,SIR_US,SIR_India
875,2022-06-15,85941290,43257730,27096571,17736696,7724035,18174100.0,29551720.0,8208324.0,119662200.0,53343430.0
876,2022-06-16,86057735,43270577,27124689,17773764,7753800,18226590.0,29712450.0,8282708.0,120370700.0,53481520.0
877,2022-06-17,86216418,43283793,27204953,17809934,7778643,18279150.0,29873980.0,8357766.0,121083500.0,53619970.0
878,2022-06-18,86230982,43296692,27204955,17844905,7801241,18331770.0,30036300.0,8433504.0,121800500.0,53758780.0
879,2022-06-19,86246101,43309473,27211896,17879160,7825659,18384450.0,30199430.0,8509929.0,122521700.0,53897940.0


In [11]:
import pandas as pd
import numpy as np

import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State

import plotly.graph_objects as go

import os
print(os.getcwd())
df_input_large=df_analyse  # pd.read_csv('data/processed/COVID_final_set.csv',sep=';')
countries_list=['Germany','Italy', 'US','India','Australia']

fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Applied Data Science on COVID-19 data

    Goal of the project is to teach data science by applying a cross industry standard process,
    it covers the full walkthrough of: automated data gathering, data transformations,
    filtering and machine learning to approximating the doubling time, and
    (static) deployment of responsive dashboard.

    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization
    '''),


    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in countries_list],
        value='Germany', # which are pre-selected
        multi=False
    ),

    dcc.Graph(figure=fig, id='main_window_slope')
])



@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value')])
def update_figure(country):

    my_yaxis={'type':"log",
                  'title':'Population infected (source johns hopkins csse, log-scale)'
              }


    traces = []    
    traces.append(dict(x=df_analyse.date, #t,
                            y=df_analyse[country],
                            mode='markers+lines',
                            opacity=0.9,
                            name='actual covid infections'
                    )
            ),
    traces.append(dict(x=df_analyse.date, #t, 
                        y=df_analyse['SIR_'+country],
                        mode='markers+lines',
                        opacity=0.6,
                        name='SIR model predictions for covid infections'
                )
        )
    

    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,

                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },

                yaxis=my_yaxis
        )
    }

if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)

D:\documents\TU K\TU K 3rd sem\enterprise data science\Enterprise-Data-Science-Submissions\notebooks
Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html
