**In Progress.**  This notebook is focused on continous-time COVID-19 trends calculations.  The calculations feed interactive data products that states, counties, or federal entities can ...

<br>

## Preliminaries

In [1]:
!rm -rf *.log
!rm -rf *.pdf
!rm -rf algorithms*
!rm -rf warehouse*

<br>

### Libraries

In [2]:
import pandas as pd
import numpy as np
import logging
import os

import zipfile
import requests
import io

<br>

### Logging

In [3]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

<br>

### Custom Classes

From GitHub

In [4]:
url = 'https://github.com/briefings/sars/raw/develop/fundamentals/algorithms/algorithms.zip'

try:
    req = requests.get(url=url)
    req.raise_for_status()
except requests.exceptions.RequestException as err:
    raise err

In [5]:
obj = zipfile.ZipFile(io.BytesIO(req.content))
obj.extractall()

<br>

Import

In [6]:
import algorithms.base.delta
import algorithms.base.difference
import algorithms.base.quantiles
import algorithms.misc.doublet

<br>

### Directories

In [7]:
warehouse = 'warehouse'
if not os.path.exists(warehouse):
    os.makedirs(warehouse)

<br>
<br>

## Analysis

### Data

<br>

**Get**

In [8]:
dataurl = 'https://raw.githubusercontent.com/briefings/sars/develop/fundamentals/atlantic/warehouse/baselines.csv'

parse_dates = ['datetimeobject']
baselines = pd.read_csv(filepath_or_buffer=dataurl, header=0, encoding='utf-8', parse_dates=parse_dates)

In [9]:
logger.info('\n{}'.format(baselines.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12012 entries, 0 to 12011
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   datetimeobject            12012 non-null  datetime64[ns]
 1   STUSPS                    12012 non-null  object        
 2   deathIncrease             12012 non-null  float64       
 3   deathCumulative           12012 non-null  float64       
 4   positiveIncrease          12012 non-null  float64       
 5   positiveCumulative        12012 non-null  float64       
 6   icuIncrease               12012 non-null  float64       
 7   icuCumulative             12012 non-null  float64       
 8   hospitalizedIncrease      12012 non-null  float64       
 9   hospitalizedCumulative    12012 non-null  float64       
 10  testIncrease              12012 non-null  float64       
 11  testCumulative            12012 non-null  float64       
 12  deathRate         

<br>

**Periods, Places**

In [10]:
periods = np.arange(6, 22, 1)
periods

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

In [11]:
placestype = 'STUSPS'
placestype

'STUSPS'

In [12]:
places = baselines[placestype].unique()
places

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR'], dtype=object)

<br>

### Positive Test Rate

Positive test rates for varying periods

$\qquad \qquad \rho_{\tau, \Delta} = 100 * \Large{ \frac{C_{\tau} - C_{\tau - \Delta}}{T_{\tau} - T_{\tau - \Delta}} }$

* $\tau$: date
* $\Delta$: days
* $\rho_{\tau, \Delta}$: The positive test rate on date $\tau$ w.r.t. a delay of $\Delta$ days, i.e., relative to $\Delta$ days previously.

* $C_{\tau}$: the cumulative number of positive cases by date $\tau$.
* $C_{\tau - \Delta}$: the cumulative number of positive cases by $\Delta$ days previously, relative to date $\tau$.

* $T_{\tau}$: the cumulative number of tests by date $\tau$.
* $T_{\tau - \Delta}$: the cumulative number of tests by $\Delta$ days previously, relative to date $\tau$.


In [13]:
numerator = 'positiveCumulative'
denominator = 'testCumulative'

doublet = algorithms.misc.doublet.Doublet(blob=baselines, periods=periods, places=places, placestype=placestype)
ptr = doublet.exc(numerator=numerator, denominator=denominator)
ptr.rename(columns={'rates': 'positiveTestRate'}, inplace=True)

<br>

### Tests/Case

Tests per case for varying periods, i.e.,

$\qquad \qquad \large{{tpc}_{_{\tau, \Delta}}} = \Large{\frac{100}{\rho_{\tau, \Delta}}}$


In [14]:
ptr.loc[:, 'testsPerCase'] = np.where(ptr['positiveTestRate'] > 0, ptr['positiveTestRate'].rdiv(100), 0 )
logger.info('\n{}'.format(ptr.tail()))

INFO:__main__:
       datetimeobject STUSPS   period  positiveTestRate  testsPerCase
181787     2020-09-04     PR  21 days             100.0           1.0
181788     2020-09-05     PR  21 days             100.0           1.0
181789     2020-09-06     PR  21 days             100.0           1.0
181790     2020-09-07     PR  21 days             100.0           1.0
181791     2020-09-08     PR  21 days             100.0           1.0


<br>

### Rolling Quantiles

Medians

In [15]:
medians: pd.DataFrame = pd.DataFrame()

for event in ['positiveIncreaseRate', 'testIncreaseRate', 'deathIncreaseRate', 'icuIncreaseRate', 'hospitalizedIncreaseRate']:

    # Focus on
    base = baselines[['datetimeobject', 'STUSPS', event]].copy()

    # Pivot -> such that each field is a place, and each instance of a field is a date in time
    segment = base.pivot(index='datetimeobject', columns='STUSPS', values=event)

    # Quantiles
    quantiles = algorithms.base.quantiles.Quantiles(data=segment, places=places, placestype=placestype)
    values = quantiles.exc(periods=periods, quantile=0.5, quantiletype=event + 'Median')

    # Structuring
    if medians.empty:
        medians = values
    else:
        medians = medians.merge(values, how='inner', on=['datetimeobject', 'STUSPS', 'period'])


In [16]:
names = {i: i.replace('IncreaseRateMedian', 'IRM') for i in 
         ['positiveIncreaseRateMedian', 'testIncreaseRateMedian', 'deathIncreaseRateMedian', 
          'icuIncreaseRateMedian', 'hospitalizedIncreaseRateMedian']}

medians.rename(columns=names, inplace=True)

In [17]:
logger.info('\n{}'.format(medians.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
Int64Index: 181792 entries, 0 to 181791
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   datetimeobject   181792 non-null  datetime64[ns]
 1   STUSPS           181792 non-null  object        
 2   positiveIRM      181792 non-null  float64       
 3   period           181792 non-null  object        
 4   testIRM          181792 non-null  float64       
 5   deathIRM         181792 non-null  float64       
 6   icuIRM           181792 non-null  float64       
 7   hospitalizedIRM  181792 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(2)
memory usage: 12.5+ MB


<br>

### Merge

In [18]:
calculations = medians.merge(ptr, how='inner', on=['datetimeobject', 'STUSPS', 'period'])

In [19]:
logger.info('\n{}'.format(calculations.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
Int64Index: 181792 entries, 0 to 181791
Data columns (total 10 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   datetimeobject    181792 non-null  datetime64[ns]
 1   STUSPS            181792 non-null  object        
 2   positiveIRM       181792 non-null  float64       
 3   period            181792 non-null  object        
 4   testIRM           181792 non-null  float64       
 5   deathIRM          181792 non-null  float64       
 6   icuIRM            181792 non-null  float64       
 7   hospitalizedIRM   181792 non-null  float64       
 8   positiveTestRate  181792 non-null  float64       
 9   testsPerCase      181792 non-null  float64       
dtypes: datetime64[ns](1), float64(7), object(2)
memory usage: 15.3+ MB


<br>

## Write

In [20]:
select = ['datetimeobject', 'STUSPS', 'positiveTestRate', 'testsPerCase', 'positiveIRM', 
          'testIRM', 'deathIRM', 'icuIRM', 'hospitalizedIRM', 'period']

In [21]:
calculations[select].to_csv(path_or_buf=os.path.join(warehouse, 'trends.csv'), header=True, index=False, encoding='utf-8')

<br>

## Clean-up

In [22]:
!rm -rf *.log
!rm -rf *.pdf
!rm -rf algorithms*