Skip to content

Commit

Permalink
NANS for HHS:
Browse files Browse the repository at this point in the history
* add missing columns
  • Loading branch information
dshemetov committed Apr 20, 2021
1 parent 773fe08 commit e6c84d8
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 23 deletions.
28 changes: 19 additions & 9 deletions hhs_hosp/delphi_hhs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@

import time
from delphi_epidata import Epidata
from delphi_utils.export import create_export_csv
from delphi_utils.geomap import GeoMapper
from delphi_utils import get_structured_logger
from delphi_utils import create_export_csv, get_structured_logger, Nans, GeoMapper
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -63,6 +61,17 @@ def generate_date_ranges(start, end):
output.append(Epidata.range(_date_to_int(start), _date_to_int(end)))
return output

def add_nancodes(df):
"""Add nancodes to a signal dataframe."""
# Default missingness codes
df["missing_val"] = Nans.NOT_MISSING
df["missing_se"] = Nans.NOT_APPLICABLE
df["missing_sample_size"] = Nans.NOT_APPLICABLE

# Mark any remaining nans with unknown
remaining_nans_mask = df["val"].isnull()
df.loc[remaining_nans_mask, "missing_val"] = Nans.UNKNOWN
return df

def run_module(params):
"""
Expand Down Expand Up @@ -99,12 +108,15 @@ def run_module(params):
geo_mapper = GeoMapper()

for sig in SIGNALS:
state = geo_mapper.add_geocode(make_signal(all_columns, sig),
"state_id", "state_code",
from_col="state")
state = make_signal(all_columns, sig)
state = geo_mapper.add_geocode(state, "state_id", "state_code", from_col="state")
for geo in GEOS:
df = make_geo(state, geo, geo_mapper)
df["se"] = np.nan
df["sample_size"] = np.nan
df = add_nancodes(df)
create_export_csv(
make_geo(state, geo, geo_mapper),
df,
params["common"]["export_dir"],
geo,
sig
Expand All @@ -123,8 +135,6 @@ def make_geo(state, geo, geo_mapper):
state, "state_code", geo,
new_col="geo_id",
date_col="timestamp")
exported["se"] = np.nan
exported["sample_size"] = np.nan
return exported

def make_signal(all_columns, sig):
Expand Down
43 changes: 29 additions & 14 deletions hhs_hosp/tests/test_run.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from datetime import datetime, date
from unittest.mock import patch

from delphi_hhs.run import _date_to_int, int_date_to_previous_day_datetime, generate_date_ranges, \
from delphi_hhs.run import _date_to_int, add_nancodes, int_date_to_previous_day_datetime, generate_date_ranges, \
make_signal, make_geo, run_module
from delphi_hhs.constants import CONFIRMED, SUM_CONF_SUSP
from delphi_utils.geomap import GeoMapper
from delphi_utils import GeoMapper, Nans
from freezegun import freeze_time
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -72,38 +72,31 @@ def test_make_geo():
"""Check that geographies transform correctly."""
test_timestamp = datetime(year=2020, month=1, day=1)
geo_mapper = GeoMapper()

data = pd.DataFrame({
'state': ['PA','WV','OH'],
'state_code': [42, 54, 39],
'timestamp': [test_timestamp]*3,
'val': [1, 2, 4],
})

template = {
'se': np.nan,
'sample_size': np.nan,
}
expecteds = {
"state": pd.DataFrame(
dict(template,
geo_id=data.state,
dict(geo_id=data.state,
timestamp=data.timestamp,
val=data.val)),
"hhs": pd.DataFrame(
dict(template,
geo_id=['3', '5'],
dict(geo_id=['3', '5'],
timestamp=[test_timestamp]*2,
val=[3, 4])),
"nation": pd.DataFrame(
dict(template,
geo_id=['us'],
dict(geo_id=['us'],
timestamp=[test_timestamp],
val=[7]))
}
for geo, expected in expecteds.items():
result = make_geo(data, geo, geo_mapper)
for series in ["geo_id", "timestamp", "val", "se", "sample_size"]:
for series in ["geo_id", "timestamp", "val"]:
pd.testing.assert_series_equal(expected[series], result[series], obj=f"{geo}:{series}")


Expand Down Expand Up @@ -131,3 +124,25 @@ def test_ignore_last_range_no_results(mock_covid_hosp, mock_export):
}
}
assert not run_module(params) # function should not raise value error and has no return value

def test_add_nancode():
data = pd.DataFrame({
'state': ['PA','WV','OH'],
'state_code': [42, 54, 39],
'timestamp': [pd.to_datetime("20200601")]*3,
'val': [1, 2, np.nan],
'se': [np.nan] * 3,
'sample_size': [np.nan] * 3,
})
expected = pd.DataFrame({
'state': ['PA','WV','OH'],
'state_code': [42, 54, 39],
'timestamp': [pd.to_datetime("20200601")]*3,
'val': [1, 2, np.nan],
'se': [np.nan] * 3,
'sample_size': [np.nan] * 3,
'missing_val': [Nans.NOT_MISSING] * 2 + [Nans.UNKNOWN],
'missing_se': [Nans.NOT_APPLICABLE] * 3,
'missing_sample_size': [Nans.NOT_APPLICABLE] * 3,
})
pd.testing.assert_frame_equal(expected, add_nancodes(data))

0 comments on commit e6c84d8

Please sign in to comment.