<a href="https://colab.research.google.com/github/bobby-mclaughlinjr/covid/blob/master/Rt*.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
import datetime as d

In [0]:
URL = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
COLUMNS = {'Province/State': 'area', 'Country/Region': 'region', 'Lat': 'latitude', 'Long': 'longitude'}

In [0]:
ASYMPTOMATIC = 0.75
R0 = 2.2
INCUBATION_DURATION = 5.2
INFECTION_DURATION = 2.9

In [0]:
class Rt(object):

    """
        The clinical dynamics in this model are an elaboration on SEIR that simulates the disease's progression at a
        higher resolution, subdividing I,RI,R into mild (patients who recover without the need for hospitalization),
        moderate (patients who require hospitalization but survive) and fatal (patients who require hospitalization
        and do not survive). Each of these variables follows its own trajectory to the final outcome, and the sum of
        these compartments add up to the values predicted by SEIR.

        Please refer to the source code for details. Note that we assume, for simplicity, that all fatalities come
        from hospitals, and that all fatal cases are admitted to hospitals immediately after the infectious period.
    """

    def __init__(self
                 , R0=R0
                 , incubation_duration=INCUBATION_DURATION
                 , infection_duration=INFECTION_DURATION
                 , asymptomatic=ASYMPTOMATIC
                 , data=None
                 ):

        self.R0 = R0
        self.incubation_duration = incubation_duration
        self.infection_duration = infection_duration
        self.asymptomatic = asymptomatic

        self.data = data

    @staticmethod
    def smoothing(X, window=7):
        return X.rolling(window=window).mean()

    def get_data(self, source='CSSE'):
        df = pd.read_csv(URL).rename(columns=COLUMNS)
        data = pd.melt(df, id_vars=COLUMNS.values(), var_name='date', value_name='cases')
        data['date'] = [d.datetime.strptime(str(date), '%m/%d/%y') for date in data['date']]

        self.data = data.sort_index().groupby(['region', 'date'])['cases'].sum().loc[['US', 'Spain', 'Italy'], :]

        return self

    def assumed(self, infectious, Rt, shift=7):
        return (Rt / self.infection_duration) * infectious.shift(shift) * (1 - self.asymptomatic)

    def calculate(self, smoothing=7):
        return self.data.copy(deep=True).groupby(level=0).apply(self._calculate, smoothing=smoothing)

    def _calculate(self, data, smoothing=7):
        data = data.to_frame('cases').reset_index(level=0, drop=True)

        data['total_cases'] = data['cases'] / (1 - self.asymptomatic)
        data['asymptomatic_cases'] = data['total_cases'] - data['cases']

        data['new_cases_actual'] = data['cases'].diff()
        data['new_cases'] = data['total_cases'].diff()
        data['new_cases_shift'] = data['new_cases'].shift(13)

        # Rt
        data['infectious'] = (data['new_cases'] - data['new_cases'].shift(3)).expanding().apply(lambda x: np.nansum(x))

        data['Rt^'] = self.smoothing(data['new_cases'].rolling(window=3).mean() * self.infection_duration / data['infectious'].rolling(window=3).mean().shift(8), window=smoothing)
        data['Rt*'] = self.smoothing(-(data['total_cases'] - data['new_cases_shift']) / ((data['total_cases'] - data['new_cases_shift']).shift(1)) * np.log(1 / self.incubation_duration) - 1, window=smoothing)
        data['Rt'] = data[['Rt^', 'Rt*']].mean(axis=1)

        data['ex_ante'] = ((data['Rt'] / self.infection_duration) * data['infectious'].shift(6) * (1 - self.asymptomatic)).shift()
        data['ex_post'] = data['new_cases_actual'].shift(-1)

        data['Rt_7'] = self.assumed(data['infectious'], Rt=0.7)
        data['Rt_8'] = self.assumed(data['infectious'], Rt=0.8)
        data['Rt_9'] = self.assumed(data['infectious'], Rt=0.9)

        # results
        current = data.iloc[-1]
        last = data.iloc[-2]

        return {'current_Rt': current['Rt']
                , 'last_Rt': last['Rt']
                , 'delta_Rt': current['Rt'] - last['Rt']
                , 'ex_ante': current['ex_ante']
                , 'ex_post': current['ex_post']
                , 'error': current['new_cases_actual'] - current['ex_ante']
                , 'error_pct': (current['ex_ante'] - current['ex_post']) / current['ex_post']
                , 'Rt_7': current['Rt_7']
                , 'Rt_8': current['Rt_8']
                , 'Rt_9': current['Rt_9']
                }

In [0]:
Rt().get_data().calculate()

region            
Italy   current_Rt        0.726579
        last_Rt           0.734940
        delta_Rt         -0.008361
        ex_ante        2589.271147
        ex_post                NaN
        error           139.728853
        error_pct              NaN
        Rt_7           2466.172414
        Rt_8           2818.482759
        Rt_9           3170.793103
Spain   current_Rt        0.754320
        last_Rt           0.723389
        delta_Rt          0.030932
        ex_ante        2373.213768
        ex_post                NaN
        error          1594.786232
        error_pct              NaN
        Rt_7           2296.482759
        Rt_8           2624.551724
        Rt_9           2952.620690
US      current_Rt        0.821928
        last_Rt           0.812764
        delta_Rt          0.009164
        ex_ante       22777.847907
        ex_post                NaN
        error         16682.152093
        error_pct              NaN
        Rt_7          19617.620690
 