# The Peak Model
A standardised model for identifying single peaks in cases/deaths/mobility drops etc. The 4 stage model assumes a period of expansion, a peak stage, a period of contraction, and a stablisation stage.

In [1]:
import datetime 

from matplotlib.pylab import plt
from matplotlib.ticker import FuncFormatter
import matplotlib.colors as mcolors
import matplotlib.dates as mdates

import pandas as pd
import numpy as np

import seaborn as sns

from loguru import logger



%matplotlib inline

# Parameters

In [2]:
# minimal_dataset = '../data/processed/minimal_ctp_covid.csv'

minimal_dataset = '../data/processed/minimal_owid_covid.csv'


In [3]:
# Parameters
minimal_dataset = "../data/processed/minimal_ctp_covid.csv"


In [4]:
peak_model_dataset = minimal_dataset.replace('minimal', 'peak_model')

peak_model_dataset

'../data/processed/peak_model_ctp_covid.csv'

# The Peak Model

In [5]:
lower_threshold, upper_threshold = .15, .85

def get_peak_model_dates(s, upper=upper_threshold, lower=lower_threshold):


    # The model parameters
    params = ['expansion_start_date', 'peak_start_date', 'contraction_start_date', 'stable_start_date']
    peak_model = pd.Series(index=params)


    if len(s):
    
        # The peak and peak date
        peak_value, peak_date = s.max(), s.idxmax()

        # pre and post peak
        pre, post = s.loc[:peak_date], s.loc[peak_date:]

        # The start of the expansion is the first date that pre>lower_threshold
        # If it never exceeds the lower thershold then there is no expansion start.
        expansion = pre[pre>lower_threshold*peak_value]
        if len(expansion): 

            expansion_start_date = expansion.index[0]
            peak_model.loc['expansion_start_date'] = expansion_start_date

            # peak start
            peak_start_date = expansion[expansion>upper_threshold*peak_value].index[0]
            peak_model.loc['peak_start_date'] = peak_start_date

            # peak end
            rev_post = post[::-1]
            peak = rev_post[rev_post>upper_threshold*peak_value]

            if len(peak):

                peak_end_date = peak.index[0]
                contraction = post[peak_end_date:]
                if len(contraction)>1:
                    contraction_start_date = contraction.index[1]

                    peak_model.loc['contraction_start_date'] = contraction_start_date

                    # The stable start
                    contraction_end = rev_post[rev_post>lower_threshold*peak_value]
                    if len(contraction_end):
                        contraction_end_date = contraction_end.index[0]
                        
                        stable = post[contraction_end_date:]
                        if len(stable)>1:
                            stable_start_date = stable.index[1]

                            peak_model.loc['stable_start_date'] = stable_start_date

    return peak_model
    

    
def get_growth_model_stages(s, labels=['E', 'P', 'C', 'S']):
        
    if len(s)>0:
                
        stages = pd.Series([np.nan]*len(s), index=s.index)
        
        stage_start_dates = get_peak_model_dates(s).dropna()
                        
        stages.loc[stage_start_dates] = labels[:len(stage_start_dates)]

        return stages.ffill()

# Load the Dataset

In [6]:
logger.info('Loading %s' % minimal_dataset)

df = pd.read_csv(minimal_dataset, parse_dates=['date'], low_memory=False)
df.shape, df.date.max()

2020-07-28 14:26:21.616 | INFO     | __main__:<module>:1 - Loading ../data/processed/minimal_ctp_covid.csv


((7599, 14), Timestamp('2020-07-27 00:00:00'))

In [7]:
# Exclude <0 cases/deaths in case of bad reporting
check_cols = df.filter(regex='_cases|_deaths').columns

df[check_cols] = df[check_cols].clip(lower=0)

df[check_cols].min(), df.shape, df['aggregation'].nunique()

(new_cases                      0.0
 new_deaths                     0.0
 new_cases_rolling_mean         0.0
 new_deaths_rolling_mean        0.0
 new_cases_rolling_mean_rel     0.0
 new_deaths_rolling_mean_rel    0.0
 dtype: float64,
 (7599, 14),
 52)

# Build the Models
Next we apply the model to a number of difference columns including case/death counts and mobility/stringency cols. We use difference labels for each to distinguish the virus models from the mobility/lockdown models, but the transitions/stages are all calculated in the same way using the same upper and lower thersholds.

In [8]:
logger.info('Building peak models.')

2020-07-28 14:26:22.137 | INFO     | __main__:<module>:1 - Building peak models.


## The Cases/Deaths Models

In [9]:
peak_cols = [
    'new_cases_rolling_mean',
    'new_deaths_rolling_mean' 
]

labels = ['E', 'P', 'C', 'S']

for peak_col in peak_cols:
    
    # Calculate the stages.
    stages = pd.DataFrame(
        df.groupby('aggregation').apply(
            lambda g: get_growth_model_stages(g.set_index('date')[peak_col].dropna(), labels=labels)
        ), columns=[peak_col+'_stage']
    )
    
    df = df.set_index(['aggregation', 'date']).join(stages).reset_index().sort_values(by='date')
    
df.shape

  


(7599, 16)

## Mobility/Stringency Models

In [10]:
peak_cols = ['google_mobility_level_rolling_mean']

labels = ['D', 'H', 'B', 'N']

def lockdown_period(mobility):
    
    if len(mobility):
    
        mobility = (100-mobility)

        # Last zero before peak
        peak_date = mobility.idxmax()
        pre_peak_min = mobility.loc[:peak_date].idxmin()

        return mobility.loc[pre_peak_min:]
    
    return mobility

for peak_col in peak_cols:
    
    # Calculate the stages.
    stages = pd.DataFrame(
        df.groupby('aggregation').apply(
            lambda g: get_growth_model_stages(lockdown_period(g.set_index('date')[peak_col].dropna()), labels=labels)
        ), columns=[peak_col+'_stage']
    )
    
    df = df.set_index(['aggregation', 'date']).join(stages).reset_index().sort_values(by='date')
    
df.shape

  


(7599, 17)

## Add Stage Days

In [11]:
# Add the stage days for the cases growth model
cases_growth_stage_days = pd.DataFrame(df.groupby(['aggregation', 'new_cases_rolling_mean_stage']).apply(
    lambda g: g.reset_index().reset_index().set_index('date')['level_0'])).rename(columns={'level_0': 'new_cases_rolling_mean_stage_day'})

df = df.set_index(['aggregation', 'new_cases_rolling_mean_stage', 'date']).join(cases_growth_stage_days).reset_index()



# Add the stage days for the deaths growth model
deaths_growth_stage_days = pd.DataFrame(df.groupby(['aggregation', 'new_deaths_rolling_mean_stage']).apply(
    lambda g: g.reset_index().reset_index().set_index('date')['level_0'])).rename(columns={'level_0': 'new_deaths_rolling_mean_stage_day'})

df = df.set_index(['aggregation', 'new_deaths_rolling_mean_stage', 'date']).join(deaths_growth_stage_days).reset_index()


# Add the stage days for the mobility model
mobility_stage_days = pd.DataFrame(df.groupby(['aggregation', 'google_mobility_level_rolling_mean_stage']).apply(
    lambda g: g.reset_index().reset_index().set_index('date')['level_0'])).rename(columns={'level_0': 'google_mobility_level_rolling_mean_stage_day'})



df = df.set_index(['aggregation', 'google_mobility_level_rolling_mean_stage', 'date']).join(mobility_stage_days).reset_index()

df.shape

(7599, 20)

## Add Cumulative Max Values

In [12]:
# Add cumulative max new case to DF
df = df.set_index(['aggregation', 'date']).join(
    pd.DataFrame(df.groupby(['aggregation']).apply(
        lambda g: g.set_index('date')['new_cases_rolling_mean'].cummax())).add_suffix('_cummax')).reset_index()
df.head()

Unnamed: 0,aggregation,date,google_mobility_level_rolling_mean_stage,new_deaths_rolling_mean_stage,new_cases_rolling_mean_stage,state,new_cases,new_deaths,population,google_mobility_level,...,new_cases_rolling_mean,new_deaths_rolling_mean,google_mobility_level_rolling_mean,new_cases_rolling_mean_rel,new_deaths_rolling_mean_rel,google_mobility_level_rolling_mean_rel,new_cases_rolling_mean_stage_day,new_deaths_rolling_mean_stage_day,google_mobility_level_rolling_mean_stage_day,new_cases_rolling_mean_cummax
0,Washington,2020-01-22,,,,Washington,2.0,,7614893.0,,...,,,,,,,,,,
1,us,2020-01-22,,,,,2.0,0.0,7614893.0,,...,,,,,,,,,,
2,us,2020-01-23,,,,,0.0,0.0,7614893.0,,...,,,,,,,,,,
3,Washington,2020-01-23,,,,Washington,0.0,,7614893.0,,...,,,,,,,,,,
4,Washington,2020-01-24,,,,Washington,0.0,,7614893.0,,...,,,,,,,,,,


# Save Countries Dataset

In [13]:
logger.info('Saving peak model dataset -> %s' % peak_model_dataset)

df.to_csv(peak_model_dataset, index=False)

df.shape, peak_model_dataset, df.date.max()

2020-07-28 14:26:25.812 | INFO     | __main__:<module>:1 - Saving peak model dataset -> ../data/processed/peak_model_ctp_covid.csv


((7599, 21),
 '../data/processed/peak_model_ctp_covid.csv',
 Timestamp('2020-07-27 00:00:00'))