In [None]:
%load_ext autoreload
%autoreload 2

from typing import Optional
import pandas as pd
import numpy as np
import pathlib
import seaborn as sns
from covidactnow.datapublic.common_fields import CommonFields
from covidactnow.datapublic import common_df
from libs.datasets import combined_datasets
from libs.datasets import AggregationLevel
from libs import top_level_metrics
from libs import us_state_abbrev
pd.options.display.max_rows = 3000
pd.options.display.max_columns = 3000

sns.set()

In [None]:
us_timeseries = combined_datasets.load_us_timeseries_dataset()

In [None]:
mass_timeseries = us_timeseries.get_subset(aggregation_level=AggregationLevel.STATE, state="CT")
# Get recent data
mass = mass_timeseries.get_data(after="2020-03-15")
# equivalent to
# us_timeseries.get_data(aggregation_level=AggregationLevel.STATE, state="MA", after="2020-08-01")
# or
# mass_data = mass_timeseries.data
# mass_data.loc[mass_data[CommonFields.DATE] > "2020-08-01"]
# or
# mass_data.loc[mass_data['date'] > "2020-08-01"]

path_to_test_positivity = "~/Downloads/test-positive.csv"

In [None]:
def mean_percent_diff(s1, s2):
    return (((s1 - s2) / (s1 + s2)) * 100).abs().mean()

In [None]:
def compare_state(state, path_to_test_positivity):
    state_timeseries = us_timeseries.get_subset(aggregation_level=AggregationLevel.STATE, state=state)
    # Get recent data
    data = state_timeseries.get_data(after="2020-03-15")
    fips = us_state_abbrev.ABBREV_US_FIPS[state]
    # METRICS API
    metrics = top_level_metrics.calculate_top_level_metrics_for_fips(fips)
    test_positivity = pd.Series(metrics["testPositivity"])

    # WEBSITE
    website = pd.read_csv(path_to_test_positivity)
    website_fips = website[website["fips"] == int(fips)]
    website_fips["date"] = website_fips["date"].astype("datetime64") #convert to datetime

    # JOIN_DATA
    positive_df = test_positivity.to_frame()
    positive_df = positive_df.rename(columns={0:"testPositivity"})
    data_with_positive =     data.set_index("date").join(positive_df).reset_index()[["date", "fips", "testPositivity", "positive_tests", "negative_tests"]] # merge to get date
    website_and_api_calc = data_with_positive.merge(website_fips, on="date", how='left')
    mean_percent_diff_result = mean_percent_diff(website_and_api_calc["testPositivity"], website_and_api_calc["test-positivity"])
    print(f"Website and API have mean % diff: {mean_percent_diff_result}")
    website_and_api_calc['diff'] = website_and_api_calc['test-positivity'] - website_and_api_calc['testPositivity']
    return website_and_api_calc

In [None]:
# Website stops computing data when negative tests dont increase multiple days in a row
compare_state("CA", path_to_test_positivity).head(10)

In [None]:
# some large differences at the beginning of the timeseries
compare_state("MA", path_to_test_positivity).head(10)

In [None]:
compare_state("CO", path_to_test_positivity).head(10)

In [None]:
compare_state("NV", path_to_test_positivity).head(10)