In [39]:
from tqdm.notebook import tqdm
import pandas as pd
from datetime import datetime, timedelta


import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)


In [40]:
def get_Rt(DFSeries):
    '''
    This function will calculate the value of Rt for a series.
    In order to calculate R0 we can adapt this 
    https://en.wikipedia.org/wiki/Basic_reproduction_number#Estimation_methods

    Reminder: Rt = R0 when no intervention has been applied and there is mixing
    
    usage get_Rt(pandas_series)
    
    Input:
        Take in a pandas series with time and value
    Output:
        Return the value of R0
    
    '''
    import numpy as np
    from scipy.optimize import minimize,curve_fit
    
    def fit_expo(x,y):   
        logParams   = np.polyfit(x, np.log(y), 1,w=np.sqrt(y))
        initParams  = (np.exp(logParams[1]), logParams[0])
        params      = curve_fit(lambda t,a,b: a*np.exp(b*t),  x,  y, p0=initParams)    
        return params[0],params[1],initParams

    x = np.arange(len(DFSeries))
    y = list(DFSeries.values)
    LogisticFit = fit_expo(x,y)

    a = LogisticFit[0][0]
    b = LogisticFit[0][1] # This is also knonw as K from descriptio in Wikipedia
    #
    # The dooubling rate is given by T_d
    #
    T_d = np.log(2)/b 
    #
    # We will use the imperial value for tau
    #
    tau = 5.1 
    
    Rt = np.mean(np.exp(b*tau))

    
    return(Rt)

In [41]:
#
# Using the function from this github
# https://github.com/ifornax/Covid19/blob/master/FunctionProvincialData.ipynb
# create a dataframe say DF1
#

In [42]:
DF1 = pd.read_csv("SAProvinceConfirmed.csv")

In [43]:
DF1.head()

Unnamed: 0,date,EC,FS,GP,KZN,LP,MP,NC,NW,WC,UNKNOWN,Confirmed
0,2020-03-05,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,2020-03-07,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,2020-03-08,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,3
3,2020-03-09,0.0,0.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,7
4,2020-03-11,0.0,0.0,5.0,7.0,0.0,0.0,0.0,0.0,1.0,0.0,13


In [44]:
DF1["date"] = pd.to_datetime(DF1.date, format='%Y-%m-%d')
DF1.index = DF1["date"]

In [45]:
DF1.dtypes

date         datetime64[ns]
EC                  float64
FS                  float64
GP                  float64
KZN                 float64
LP                  float64
MP                  float64
NC                  float64
NW                  float64
WC                  float64
UNKNOWN             float64
Confirmed             int64
dtype: object

In [46]:
#
# To calculate R_0 we need to take data before the lockdown, where we have a mixing
#
ToFit = DF1[DF1.index < datetime.strptime('Mar 27 2020', '%b %d %Y')]['Confirmed']
get_Rt(ToFit)


3.9886984387357933

In [47]:
Rt_WC = []
Rt_GP = []
Rt_KZN = []
Rt_Confirmed = []
for Week in DF1.index.week.unique()[1:]:
    #DF1[DF1.index.week == Week]
    Rt_WC.append(get_Rt(DF1[DF1.index.week == Week]['WC'].dropna()))
    Rt_GP.append(get_Rt(DF1[DF1.index.week == Week]['GP'].dropna()))
    Rt_KZN.append(get_Rt(DF1[DF1.index.week == Week]['KZN'].dropna()))
    Rt_Confirmed.append(get_Rt(DF1[DF1.index.week == Week]['Confirmed'].dropna()))


divide by zero encountered in log


invalid value encountered in multiply


Covariance of the parameters could not be estimated



In [48]:
Rt_DF = pd.DataFrame(list(zip(Rt_WC, Rt_GP, Rt_KZN, Rt_Confirmed)),
              columns=['Rt_WC','RT_GP', 'Rt_KZN','Confirmed'])

In [49]:
Rt_DF.index = DF1.index.week.unique()[1:]

In [53]:
#
# Because the week[1] has few values, I will no trust the fit
#
Rt_DF.iloc[1:].iplot(theme="white",xTitle = "Week No",yTitle="Rt",width=3,symbol='circle-dot',size=25,
            title=f"Rt calculated per week number")