In [None]:
from libs import notebook_helpers
notebook_helpers.set_covid_data_public()

In [None]:
from IPython.display import Javascript, display
from ipywidgets import widgets

def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Execute all cells")
button.on_click(run_all)
display(button)

In [None]:
%load_ext autoreload
%autoreload 2

import pathlib

from datetime import timedelta
from datetime import datetime
from libs.datasets.sources.can_pyseir_location_output import CANPyseirLocationOutput
from libs.qa import dataset_summary
from IPython.display import display, clear_output
from ipywidgets import interact
import ipywidgets as widgets
from libs.datasets.dataset_utils import AggregationLevel
from covidactnow.datapublic.common_fields import CommonFields
import pandas as pd
from libs.datasets import dataset_utils
from libs.datasets import combined_datasets
from libs.datasets.sources import covid_county_data
from libs.datasets.sources import cds_dataset
from libs.datasets.sources import covid_tracking_source
from libs.datasets.sources import nytimes_dataset
from libs.datasets.sources import jhu_dataset
from libs.datasets.sources import nha_hospitalization
from libs.datasets.sources import texas_hospitalizations
from libs.datasets.timeseries import TimeseriesDataset
pd.options.display.max_rows = 3000
pd.options.display.max_columns = 3000
from IPython.display import HTML

In [None]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

# Data Source Comparison notebook
## Charts

#### Combined datasets timeseries
Choose display data from combined datasets, showing different columns

#### Data sources for a specific field
For a given field, show the values for all different data sources

#### Columns for a specific data source
Show data for a data source.

### Tips
 * Use either a state or a fips code.  Choosing a state will filter to the state level metrics
   If you set fips with a state selected, may not show any data
 * Checking the diff box will calculate diffs between days for each column

TODO: Add support for loading files/checking out specific commit of covid-data-model build + associated covid-data-public commit.

In [None]:

def load_combined_timeseries(sources, timeseries):
    timeseries_data = timeseries.data.copy()
    timeseries_data["source"] = "Combined Data"

    combined_timeseries = TimeseriesDataset(pd.concat([timeseries_data] + [source.data for source in sources.values()]))
    return combined_timeseries


def filter_field_data(combined_series, field, state=None, fips=None, after=None, diff=False, before=None):
    columns = [CommonFields.FIPS, CommonFields.DATE] + [field, "source"]
    level = None
    if state and not fips:
        combined_series = combined_series.get_subset(aggregation_level=AggregationLevel.STATE)

    data = combined_series.get_data(state=state, fips=fips, after=after, columns_slice=columns, before=before)
    data = data.set_index(["fips", "date", "source"])
    series = data[field]
    if diff:
        series = series.diff()
    return series.unstack(level=-1)
    
def filter_timeseries(timeseries, state=None, fips=None, columns=None, after=None, before=None, diff=False):
    if columns:
        columns = [CommonFields.FIPS, CommonFields.DATE] + columns
    us_timeseries = timeseries
    level = None
    if state and not fips:
        us_timeseries = us_timeseries.get_subset(aggregation_level=AggregationLevel.STATE)
        
    data = us_timeseries.get_data(state=state, fips=fips, columns_slice=columns, after=after).set_index(['fips', 'date'])
    if not diff:
        return data
    
    return data.diff()


In [None]:
print("Loading data sources...")
sources = notebook_helpers.load_data_sources_by_name()
timeseries = combined_datasets.load_us_timeseries_dataset()
combined_timeseries = load_combined_timeseries(sources, timeseries)

start_date = (datetime.utcnow() - timedelta(days=14)).isoformat()[:10]
states = [" "] + timeseries.states
columns = timeseries.data.columns
not_included_columns = ['fips', 'date', 'state', 'county', 'country', 'aggregate_level']
columns = [column for column in columns if column not in not_included_columns]
clear_output()
print("Successfully loaded data sources")

## Combined Datasets Timeseries

In [None]:
column_selector = widgets.SelectMultiple(
    options=[column for column in columns if column not in ['fips', 'date']], 
    value=["cases", "deaths", "current_hospitalized", "current_icu", "positive_tests", "negative_tests"]
)

@interact
def display_timeseries(
    state=states, fips="", columns=column_selector, after=start_date, before="", diff=False
):
    if before == "":
        before = None
    if state == " ":
        state = None
    columns = list(columns)
    if not state and not fips:
        return
    data = filter_timeseries(timeseries, state=state, fips=fips, columns=columns, after=after, diff=diff, before=before)
    display(data)

## Data sources for a specific field

In [None]:
@interact
def display_single_field(state=states, fips="", field=columns, after=start_date, before="", diff=False):
    if state == " ":
        state = None
    if before == "":
        before == ""
    if not state and not fips:
        return
    data = filter_field_data(combined_timeseries, field, state=state, fips=fips, after=after, diff=diff, before=before)
    display(data)

## Data for a specific data source

In [None]:
source_selector = widgets.Select(
    options=list(sources.items())
)

all_columns = columns
column_selector2 = widgets.SelectMultiple(
    options=[column for column in columns if column not in ['fips', 'date']], 
    value=["cases", "deaths", "current_hospitalized", "current_icu", "positive_tests", "negative_tests"]
)

@interact
def display_data_source_timeseries(
    dataset=source_selector, state=states, fips="", columns=column_selector2, after=start_date, before="", diff=False, show_all_columns=False
):
    if show_all_columns:
        columns = all_columns
        
    columns = [column for column in columns if column in dataset.data.columns]
    if before == "":
        before = None
    if state == " ":
        state = None
    columns = list(columns)
    if not state and not fips:
        return
    data = filter_timeseries(dataset, state=state, fips=fips, columns=columns, after=after, diff=diff, before=before)
    display(data)