**In Progress.**  This notebook is focused on continous-time COVID-19 trends calculations.  The calculations feed interactive data products that states, counties, or federal entities can ...

<br>

## Preliminaries

In [1]:
!rm -rf *.log
!rm -rf *.pdf
!rm -rf algorithms*
!rm -rf warehouse*

<br>

### Libraries

In [2]:
import pandas as pd
import numpy as np
import logging
import os
import pathlib

import zipfile
import requests
import io

<br>

### Logging

In [3]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

<br>

### Custom Classes

From GitHub

In [4]:
url = 'https://github.com/briefings/sars/raw/develop/fundamentals/algorithms/algorithms.zip'

try:
    req = requests.get(url=url)
    req.raise_for_status()
except requests.exceptions.RequestException as err:
    raise err

In [5]:
obj = zipfile.ZipFile(io.BytesIO(req.content))
obj.extractall()

<br>

Import

In [6]:
import algorithms.base.delta
import algorithms.base.differences
import algorithms.base.quantiles
import algorithms.misc.doublet

<br>

### Directories

In [7]:
warehouse = 'warehouse'
if not os.path.exists(warehouse):
    os.makedirs(warehouse)

<br>
<br>

## Measures & Metrics

In [8]:
parent = str(pathlib.Path(os.getcwd()).parent)
parent

'J:\\library\\projects\\sars\\fundamentals\\atlantic'

### The Data

In [9]:
datauri = os.path.join(parent, 'warehouse', 'baselines.csv')

parse_dates = ['datetimeobject']
baselines = pd.read_csv(filepath_or_buffer=datauri, header=0, encoding='utf-8', parse_dates=parse_dates)

<br>

**Daily Positive Test Rate**

In [10]:
series = (baselines.positiveIncrease / baselines.testIncrease).fillna(value=0).values
series = np.where(np.isinf(series), 0, series)
series

array([0., 0., 0., ..., 1., 1., 1.])

In [11]:
baselines.loc[:, 'dailyPositiveTestRate'] = 100 * series

<br>

**Preview**

In [12]:
logger.info('\n{}'.format(baselines.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12688 entries, 0 to 12687
Data columns (total 24 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   datetimeobject            12688 non-null  datetime64[ns]
 1   STUSPS                    12688 non-null  object        
 2   deathIncrease             12688 non-null  float64       
 3   deathCumulative           12688 non-null  float64       
 4   positiveIncrease          12688 non-null  float64       
 5   positiveCumulative        12688 non-null  float64       
 6   icuIncrease               12688 non-null  float64       
 7   icuCumulative             12688 non-null  float64       
 8   hospitalizedIncrease      12688 non-null  float64       
 9   hospitalizedCumulative    12688 non-null  float64       
 10  testIncrease              12688 non-null  float64       
 11  testCumulative            12688 non-null  float64       
 12  deathRate         

<br>

**Periods, Places**

In [13]:
periods = np.arange(6, 22, 1)
periods

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

In [14]:
placestype = 'STUSPS'
placestype

'STUSPS'

In [15]:
places = baselines[placestype].unique()
places

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
       'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR'], dtype=object)

<br>
<br>

### Positive Test Rate: Periodic

<br>

Foremost, the **Positive Test Rates** for varying periods are evaluated via

$\qquad \qquad \rho_{\tau, \Delta} = 100 * \Large{ \frac{P_{\tau} - P_{\tau - \Delta + 1}}{T_{\tau} - T_{\tau - \Delta + 1}} }$

wherein

<table style="width:45%; text-align: left; border: 0px solid black; float:left; margin-left: 60px">
    <tr>
        <th style="width:20%">Variable</th><th>Description</th> 
    </tr>
    <tr>
        <td>$\tau$</td><td>date</td>
    </tr>
    <tr>
        <td>$\Delta$</td><td>days</td>
    </tr>
    <tr>
        <td>$\rho_{\tau, \Delta}$</td>
        <td>The positive test rate on date $\tau$ w.r.t. starting date $\tau$ - ($\Delta$ - 1)</td>
    </tr>
    <tr>
      <td>$P_{\tau}$</td><td>The cumulative number of positive cases by date $\tau$.</td>
    </tr>
    <tr>
      <td>$P_{\tau - \Delta + 1}$</td>
      <td>The cumulative number of positive cases by starting date $\tau$ - ($\Delta$ - 1)</td>
    </tr>
    <tr>
      <td>$T_{\tau}$</td><td>The cumulative number of tests by date $\tau$.</td>
    </tr>
    <tr>
      <td>$T_{\tau - \Delta + 1}$</td><td>The cumulative number of tests by starting date $\tau$ - ($\Delta$ - 1).</td>
    </tr>
</table>


In [16]:
numerator = 'positiveCumulative'
denominator = 'testCumulative'

doublet = algorithms.misc.doublet.Doublet(blob=baselines, periods=periods, places=places, placestype=placestype)
ptr = doublet.exc(numerator=numerator, denominator=denominator)
ptr.rename(columns={'rates': 'positiveTestRate'}, inplace=True)

<br>
<br>

The **Tests/Case** for varying periods is evaluated via

$\qquad \qquad \text{tpc}_{_{\tau, \Delta}} = \Large{\frac{100}{\rho_{\tau, \Delta}}}$

and, similar to previous definitions,

<table style="width:45%; text-align: left; border: 0px solid black; float:left; margin-left: 60px">
    <tr>
        <th style="width:20%">Variable</th><th>Description</th> 
    </tr>
    <tr>
        <td>$\tau$</td><td>date</td>
    </tr>
    <tr>
        <td>$\Delta$</td><td>days</td>
    </tr>
    <tr>
        <td>$\rho_{\tau, \Delta}$</td>
        <td>The positive test rate on date $\tau$ w.r.t. starting date $\tau$ - ($\Delta$ - 1)</td>
    </tr>
    <tr>
      <td>$\text{tpc}_{_{\tau, \Delta}}$</td><td>The tests per case value on date $\tau$ w.r.t. starting date $\tau$ - ($\Delta$ - 1).</td>
    </tr>
</table>




In [17]:
ptr.loc[:, 'testsPerCase'] = np.where(ptr['positiveTestRate'] > 0, ptr['positiveTestRate'].rdiv(100), 0 )

<br>
<br>

**Write**

In [18]:
ptr.to_csv(path_or_buf=os.path.join(warehouse, 'ptrPeriodic.csv'), header=True, index=False, encoding='utf-8')

In [19]:
logger.info('\n{}'.format(ptr.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
Int64Index: 192608 entries, 0 to 192607
Data columns (total 5 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   datetimeobject    192608 non-null  datetime64[ns]
 1   STUSPS            192608 non-null  object        
 2   period            192608 non-null  object        
 3   positiveTestRate  192608 non-null  float64       
 4   testsPerCase      192608 non-null  float64       
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 8.8+ MB


<br>
<br>

### Positive Test Rate: Running Medians Across Varying Days

In [20]:
event = 'dailyPositiveTestRate'

# Focus on
base = baselines[['datetimeobject', 'STUSPS', event]].copy()

# Pivot -> such that each field is a place, and each instance of a field is a date in time
segment = base.pivot(index='datetimeobject', columns='STUSPS', values=event)

# Quantiles
quantiles = algorithms.base.quantiles.Quantiles(data=segment, places=places, placestype=placestype)
matrix = quantiles.exc(periods=np.concatenate((np.array([1]), periods)), quantile=0.5, fieldname=(event + 'Median'))


In [21]:
matrix.rename(columns={'dailyPositiveTestRateMedian': 'dailyPTRM'}, inplace=True)

In [22]:
matrix.loc[:, 'dailyTPCM'] = np.where(matrix['dailyPTRM'] > 0, matrix['dailyPTRM'].rdiv(100), 0 )

<br>

**Write**

In [23]:
matrix[['datetimeobject', 'STUSPS', 'period', 'dailyPTRM', 'dailyTPCM']
      ].to_csv(path_or_buf=os.path.join(warehouse, 'ptrDaily.csv'), header=True, index=False, encoding='utf-8')

In [24]:
logger.info('\n{}'.format(matrix.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205296 entries, 0 to 205295
Data columns (total 5 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   datetimeobject  205296 non-null  datetime64[ns]
 1   STUSPS          205296 non-null  object        
 2   dailyPTRM       205296 non-null  float64       
 3   period          205296 non-null  object        
 4   dailyTPCM       205296 non-null  float64       
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 7.8+ MB


<br>
<br>

### $inc./100K$: Running Medians Across Varying Days

For each `increase/100K` type

$\qquad \qquad \hat{\mu}_{\tau, \Delta} = median(S_{\tau - \Delta + 1}, \; \ldots, \; S_{\tau -1}, \;  S_{\tau})$

wherein

<table style="width:45%; text-align: left; border: 0px solid black; float:left; margin-left: 60px">
    <tr>
        <th style="width:20%">Variable</th><th>Description</th> 
    </tr>
    <tr>
        <td>$\tau$</td><td>date</td>
    </tr>
    <tr>
        <td>$\Delta$</td><td>days</td>
    </tr>
    <tr>
        <td>$\hat{\mu}_{\tau, \Delta}$</td>
        <td>The median on date $\tau$ w.r.t. the $\Delta$ days ending on date $\tau$</td>
    </tr>
    <tr>
      <td>$S$</td><td>A series, e.g., daily $deaths/100K$</td>
    </tr>
    <tr>
      <td>$S_{\tau}$</td>
      <td>The series data value on date $\tau$</td>
    </tr>
</table>

<br>


In [25]:
medians: pd.DataFrame = pd.DataFrame()

for event in ['positiveIncreaseRate', 'testIncreaseRate', 'deathIncreaseRate', 'icuIncreaseRate', 'hospitalizedIncreaseRate']:

    # Focus on
    base = baselines[['datetimeobject', 'STUSPS', event]].copy()

    # Pivot -> such that each field is a place, and each instance of a field is a date in time
    segment = base.pivot(index='datetimeobject', columns='STUSPS', values=event)

    # Quantiles
    quantiles = algorithms.base.quantiles.Quantiles(data=segment, places=places, placestype=placestype)
    values = quantiles.exc(periods=periods, quantile=0.5, fieldname=(event + 'Median'))

    # Structuring
    if medians.empty:
        medians = values
    else:
        medians = medians.merge(values, how='inner', on=['datetimeobject', 'STUSPS', 'period'])


In [26]:
names = {i: i.replace('IncreaseRateMedian', 'IRM') for i in 
         ['positiveIncreaseRateMedian', 'testIncreaseRateMedian', 'deathIncreaseRateMedian', 
          'icuIncreaseRateMedian', 'hospitalizedIncreaseRateMedian']}

medians.rename(columns=names, inplace=True)

medians = medians[['datetimeobject', 'STUSPS', 'period', 'positiveIRM', 'testIRM', 'deathIRM', 'icuIRM', 'hospitalizedIRM']]

<br>
<br>

**Write**

In [27]:
medians.to_csv(path_or_buf=os.path.join(warehouse, 'medians.csv'), header=True, index=False, encoding='utf-8')

In [28]:
logger.info('\n{}'.format(medians.info()))

INFO:__main__:
None


<class 'pandas.core.frame.DataFrame'>
Int64Index: 192608 entries, 0 to 192607
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   datetimeobject   192608 non-null  datetime64[ns]
 1   STUSPS           192608 non-null  object        
 2   period           192608 non-null  object        
 3   positiveIRM      192608 non-null  float64       
 4   testIRM          192608 non-null  float64       
 5   deathIRM         192608 non-null  float64       
 6   icuIRM           192608 non-null  float64       
 7   hospitalizedIRM  192608 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(2)
memory usage: 13.2+ MB


<br>
<br>

### $cum./100K$: Percentage Change Across Varying Days

The **running percentage change** w.r.t. defined **periods** for each `cumulative value per 100K` type


$\qquad \qquad \text{pc}_{\tau, \Delta} = 100 * \Large{ \frac{C_{\tau} \; - \; C_{\tau - \Delta + 1}}{C_{\tau - \Delta + 1}} }$

wherein

<table style="width:45%; text-align: left; border: 0px solid black; float:left; margin-left: 60px">
    <tr>
        <th style="width:20%">Variable</th><th>Description</th> 
    </tr>
    <tr>
        <td>$\tau$</td><td>date</td>
    </tr>
    <tr>
        <td>$\Delta$</td><td>days</td>
    </tr>
    <tr>
        <td>$\text{pc}_{\tau, \Delta}$</td>
        <td>The percentage change on date $\tau$ w.r.t. initial date $\tau$ - ($\Delta$ - 1)</td>
    </tr>
    <tr>
      <td>$C_{\tau}$</td><td>The cumulative value on date $\tau$.</td>
    </tr>
    <tr>
      <td>$C_{\tau - \Delta + 1}$</td>
      <td>The cumulative value on initial date $\tau$ - ($\Delta$ - 1).</td>
    </tr>
</table>

<br>


In [29]:
percentages = pd.DataFrame()

for event in ['deathRate', 'positiveRate', 'testRate', 'icuRate', 'hospitalizedRate']:

    # Focus on
    base = baselines[['datetimeobject', 'STUSPS', event]]
        
    # Pivot -> such that each field is a place, and each instance of a field is a date in time
    segment = base.pivot(index='datetimeobject', columns='STUSPS', values=event)

    # The percentage differences
    delta = algorithms.base.delta.Delta(data=segment, places=places, placestype=placestype)
    dataset = delta.exc(periods=periods, fieldname=(event + 'Delta'))
        
        
    # Include the variable the delta calculations are based on
    dataset = dataset.merge(base, how='left', on=['datetimeobject', 'STUSPS'])


    if percentages.empty:
        percentages = dataset
    else:
        percentages = percentages.merge(dataset, how='inner', on=['datetimeobject', 'STUSPS', 'period'])


In [30]:
percentages = percentages[['datetimeobject', 'STUSPS', 'period', 'deathRateDelta', 'deathRate', 
                           'positiveRateDelta', 'positiveRate', 'testRateDelta', 'testRate',
                            'icuRateDelta', 'icuRate', 'hospitalizedRateDelta', 'hospitalizedRate']]

<br>
<br>

**Write**

In [31]:
percentages.to_csv(path_or_buf=os.path.join(warehouse, 'percentages.csv'), header=True, index=False, encoding='utf-8')

In [32]:
logger.info('\n{}\n'.format(percentages.info()))

INFO:__main__:
None



<class 'pandas.core.frame.DataFrame'>
Int64Index: 192608 entries, 0 to 192607
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   datetimeobject         192608 non-null  datetime64[ns]
 1   STUSPS                 192608 non-null  object        
 2   period                 192608 non-null  object        
 3   deathRateDelta         192608 non-null  float64       
 4   deathRate              192608 non-null  float64       
 5   positiveRateDelta      192608 non-null  float64       
 6   positiveRate           192608 non-null  float64       
 7   testRateDelta          192608 non-null  float64       
 8   testRate               192608 non-null  float64       
 9   icuRateDelta           192608 non-null  float64       
 10  icuRate                192608 non-null  float64       
 11  hospitalizedRateDelta  192608 non-null  float64       
 12  hospitalizedRate       192608 non-null  floa

<br>

## Clean-up

In [33]:
!rm -rf *.log
!rm -rf *.pdf
!rm -rf algorithms*