Notes in progress ...

<dl>
    <dt><b>Aim</b></dt>
  <dd>Determine the degree of dispersion, per day, amongst a state's delta curves.</dd>
  <dt><b>Why?</b></dt>
    <dd>In the case of <i>positive rate delta curves</i>, as the dispersion increases from zero, the more likely an impending outbreak [mathematical proof].  In the case of <i>hospitalization rate delta curves</i>, as the dispersion increases from zero, it is quite probable that hospitalisations will increase rapidly [contingency planning alert?] </dd>
</dl>

<br>

## Preliminaries

### Libraries

In [1]:
import pandas as pd
import numpy as np

import logging

import os
import pathlib
import sys


<br>

### Paths

In [2]:
child = os.getcwd()
parent = str(pathlib.Path(child).parent)

In [3]:
root = os.path.join(child, 'warehouse')
warehouse = os.path.join(root, 'dispersions')

<br>

Appending Paths

In [4]:
sys.path.append(parent)

<br>

### Logging

In [5]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [6]:
logger.info(root)

INFO:__main__:J:\library\projects\sars\fundamentals\atlantic\notebooks\warehouse


In [7]:
logger.info(warehouse)

INFO:__main__:J:\library\projects\sars\fundamentals\atlantic\notebooks\warehouse\dispersions


<br>

### Custom

In [8]:
import atlantic.base.directories

<br>

Set-up directories

In [9]:
directories = atlantic.base.directories.Directories()
directories.cleanup(listof=[warehouse])
directories.create(listof=[warehouse])

<br>
<br>

## Data

In [10]:
datauri = os.path.join(root, 'trends', 'percentages.csv')

parse_dates = ['datetimeobject']
percentages = pd.read_csv(filepath_or_buffer=datauri, header=0, encoding='utf-8', parse_dates=parse_dates)

In [11]:
logger.info(percentages.info())

INFO:__main__:None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241384 entries, 0 to 241383
Data columns (total 15 columns):
 #   Column                          Non-Null Count   Dtype         
---  ------                          --------------   -----         
 0   datetimeobject                  241384 non-null  datetime64[ns]
 1   STUSPS                          241384 non-null  object        
 2   period                          241384 non-null  object        
 3   deathRateDelta                  241384 non-null  float64       
 4   deathRate                       241384 non-null  float64       
 5   positiveRateDelta               241384 non-null  float64       
 6   positiveRate                    241384 non-null  float64       
 7   testRateDelta                   241384 non-null  float64       
 8   testRate                        241384 non-null  float64       
 9   icuRateDelta                    241384 non-null  float64       
 10  icuRate                         241384 non-null  float64

<br>
<br>

## Calculations

In [12]:
sections = ['positiveRateDelta', 'deathRateDelta', 'hospitalizedRateDelta']

<br>

### Functions

<br>

Baseline table

In [13]:
def baseline(data: pd.DataFrame, section: str):
    
    structure = pd.pivot_table(data, index=['datetimeobject', 'STUSPS'], columns=['period'], values=[section])
    structure.dropna(axis=0, how='any', inplace=True)

    periodfields = structure.columns

    structure.loc[:, 'range'] = structure[periodfields].max(axis=1) - structure[periodfields].min(axis=1)
    structure.loc[:, 'midpoint'] = 0.5*structure['range'] + structure[periodfields].min(axis=1)
    structure.loc[:, 'median'] = structure[periodfields].median(axis=1)

    structure.reset_index(drop=False, inplace=True)    
    matrix = structure[['datetimeobject', 'STUSPS', 'range', 'midpoint', 'median']].values
    
    return pd.DataFrame(data=matrix, columns=['datetimeobject', 'STUSPS', 'range', 'midpoint', 'median'])


<br>

Scores

In [14]:
def scores(data: pd.DataFrame):
    
    blob = data.copy()
    
    scores = blob[['range', 'midpoint']].apply(lambda x: x['midpoint'] * np.log(x['range']) if x['range'] > 0 else 0, axis=1)
    
    return pd.concat([blob, scores.rename('score')], axis=1)


<br>

Latest

In [15]:
def latest(data: pd.DataFrame):
    
    blob = data.copy()
    
    condition = blob['datetimeobject'] == blob['datetimeobject'].max()
    
    return blob[condition].sort_values(by='rank')
    

<br>

### Divergence

In [16]:
for section in sections:
    
    # A dta set w.r.t. a measure
    example = percentages[['datetimeobject', 'STUSPS', 'period', section]]
    logger.info('\n{}\n'.format(section))
    
    # ['datetimeobject', 'STUSPS', 'range', 'midpoint', 'median'] 
    data = baseline(data=example, section=section)
        
    # ['datetimeobject', 'STUSPS', 'range', 'midpoint', 'median', 'score']
    data = scores(data=data)
            
    # ['datetimeobject', 'STUSPS', 'range', 'midpoint', 'median', 'score', 'rank']
    ranks = data[['datetimeobject', 'score']].groupby(by='datetimeobject').rank(method='min', ascending=False).score
    data = pd.concat([data, ranks.rename('rank')], axis=1)    
    data.to_csv(path_or_buf=os.path.join(warehouse, section + 'Dispersion.csv'), header=True, encoding='utf-8', index=False)
    logger.info('\n{}\n'.format(data.head()))
    
    # ['datetimeobject', 'STUSPS', 'range', 'midpoint', 'median', 'score', 'rank']
    inbrief = latest(data=data)
    inbrief.to_csv(path_or_buf=os.path.join(warehouse, section + 'DispersionLatest.csv'), header=True, encoding='utf-8', index=False)
    logger.info('\n{}\n'.format(inbrief.head()))
    

INFO:__main__:
positiveRateDelta

INFO:__main__:
  datetimeobject STUSPS range midpoint median  score  rank
0     2020-02-13     AK     0        0      0    0.0   2.0
1     2020-02-13     AL     0        0      0    0.0   2.0
2     2020-02-13     AR     0        0      0    0.0   2.0
3     2020-02-13     AZ     0        0      0    0.0   2.0
4     2020-02-13     CA     0        0      0    0.0   2.0

INFO:__main__:
      datetimeobject STUSPS    range midpoint   median       score  rank
16618     2020-12-28     NH  68.2209  34.1105  19.3353  144.039945   1.0
16609     2020-12-28     ME  67.2086  33.6043  23.3664  141.400138   2.0
16592     2020-12-28     CA  60.6899  30.3449   22.638  124.589533   3.0
16638     2020-12-28     WV  48.0735  24.0368  17.1958   93.087945   4.0
16626     2020-12-28     PA  46.1042  23.0521  14.3191   88.310269   5.0

INFO:__main__:
deathRateDelta

INFO:__main__:
  datetimeobject STUSPS range midpoint median  score  rank
0     2020-02-13     AK     0        

<br>
<br>

## End

In [17]:
%%bash

date +"%Y-%m-%d %T"

2020-12-29 21:38:49
