In [None]:
# default_exp core

# Core

> Access to external data, constants, and important dates.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#hide
%matplotlib inline

In [None]:
#export
import os
import pandas as pd
import requests
from fastcore.all import *
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np

In [None]:
#export
sns.set_style("whitegrid")
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})

In [None]:
#export
GAMMA = 1/7.5

## NYS Data -

In [None]:
#export
class NYSAPI:
    def __init__(self, usecols=['test_date', 'total_number_of_tests', 'new_positives']): 
        self.url_base = "https://health.data.ny.gov/resource/xdss-u53e.csv/"
        self.usecols = usecols
        self.pretty_cols = [x.split('_')[-1].capitalize() for x in self.usecols]

    def get_data(self, offset=0, limit=5000):
        url = self.url_base + f'?$limit={limit}&$offset={offset}'
        return pd.read_csv(url, usecols=self.usecols)[self.usecols]
    
    def iter_data(self, offset=0, limit=5000):
        df = pd.DataFrame(columns=self.usecols)
        while True:
            df = self.get_data(offset=offset, limit=limit)
            if len(df)==0: return
            offset += limit
            yield  df
    
    def get_all_data(self):
        df = pd.DataFrame(columns=self.usecols)
        for o in self.iter_data(): df = df.append(o)
        return df

    def get_all_data_nice(self):
        df = self.get_all_data()
        df = df.rename(columns={k:v for k,v in zip(self.usecols, self.pretty_cols)})
        if 'Date' in df.columns: df['Date'] = pd.to_datetime(df['Date'])
        return df

    def get_all_data_statewide(self, min_date='2020-03-15'):
        '''Gets statewide aggregated data.'''
        df = self.get_all_data_nice()
        assert 'Date' in df.columns, 'data do not have Date column'
        df['date'] = df['Date']
        df = df.groupby('date').sum()
        df['Date'] = pd.to_datetime(df.index)
        df['Odds'] = df.Positives / (df.Tests - df.Positives)
        df = df[df.Date>=min_date]
        return df

In [None]:
show_doc(NYSAPI.get_all_data_statewide)

<h4 id="NYSAPI.get_all_data_statewide" class="doc_header"><code>NYSAPI.get_all_data_statewide</code><a href="__main__.py#L31" class="source_link" style="float:right">[source]</a></h4>

> <code>NYSAPI.get_all_data_statewide</code>(**`min_date`**=*`'2020-03-15'`*)

Gets statewide aggregated data.

In [None]:
ny = NYSAPI()
df = ny.get_all_data_statewide()
df.head()

Unnamed: 0_level_0,Tests,Positives,Date,Odds
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-15,1936,294,2020-03-15,0.17905
2020-03-16,2907,432,2020-03-16,0.174545
2020-03-17,4553,1009,2020-03-17,0.284707
2020-03-18,7698,1769,2020-03-18,0.298364
2020-03-19,10124,2950,2020-03-19,0.411207


In [None]:
#hide
max_day = df.iloc[df.Odds.argmax()]
print(f'On {max_day.Date:%B %d, %Y} the maximum Positivity Odd was registerd {max_day.Odds:3.4}')

On March 31, 2020 the maximum Positivity Odd was registerd 1.017


## NYS Important Dates

- 03/18/2020 [school clousure](http://www.nysed.gov/news/2020/state-education-department-issues-updated-guidance-schools-regarding-novel-coronavirus)
- 03/20/2020 00:00 [50% of the workforce](https://www.governor.ny.gov/news/amid-ongoing-covid-19-pandemic-governor-cuomo-announces-deployment-1000-bed-hospital-ship-usns)
- 03/22/2020 20:00 ny_pause 
- 04/03/2020 [CDC masks](https://www.npr.org/sections/goatsandsoda/2020/04/10/829890635/why-there-so-many-different-guidelines-for-face-masks-for-the-public)
- 04/12/2020 mask_employers
- 04/17/2020 mask_public


In [None]:
#export
NEW_YORK_EVENTS = L('03-16-2020 20:00',
                    '03-18-2020 20:00',
                    '03-20-2020 20:00',
                    '03-22-2020 00:00',
                    '04-03-2020 00:00',
                    '04-12-2020 00:00',
                    '04-17-2020 00:00').map(pd.to_datetime)

## Plot

In [None]:
#export
palette = sns.palettes.color_palette('colorblind')

In [None]:
#export
@delegates(plt.plot)
def plot_data_and_fit(df, x, y, y_hat, yl, yu, logy=True, palette=palette, ax=None, **kwargs):
    if not ax: fig, ax = plt.subplots(**kwargs)
    if y: df.plot.scatter(x=x, y=y, logy=logy, ax=ax, c=np.array(palette[0])[None,:], label=y)
    if y_hat: df.plot(x=x, y=y_hat, logy=logy, ax=ax, c=palette[1], label=y_hat)
    if yl: plt.fill_between(df.index, df[yl], df[yu], alpha=0.2, color=palette[1], label='95%CI');
    hl = ax.get_legend_handles_labels()
    hl2 = L((h, l) for h,l in zip(*hl) if not l.startswith('95'))
    ax.legend(hl2.itemgot(0), hl2.itemgot(1))
    return ax

In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted 01_lasso.ipynb.
Converted 02_counterfactual.ipynb.
Converted index.ipynb.
