Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions src/acquisition/covid_hosp/common/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self,
connection,
table_name=None,
columns_and_types=None,
additional_fields=tuple()):
additional_fields=None):
"""Create a new Database object.

Parameters
Expand All @@ -26,20 +26,20 @@ def __init__(self,
An open connection to a database.
table_name : str
The name of the table which holds the dataset.
columns_and_types : list[tuple[str, Callable[str, ...]]]
List of CSV columns in order of appearance in the database. The first
element of each tuple is the CSV column name, and the second element is a
function which converts a string into the appropriate datatype for the
column.
columns_and_types : tuple[str, str, Callable]
List of 3-tuples of (CSV header name, SQL column name, data type) for
all the columns in the CSV file.
additional_fields : tuple[str]
Tuple of additional fields to include at the end of the row which are not
present in the CSV data.
List of 2-tuples of (value, SQL column name) fordditional fields to include
at the end of the row which are not present in the CSV data.
"""

self.connection = connection
self.table_name = table_name
self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \
'publication_date'
self.columns_and_types = columns_and_types
self.additional_fields = additional_fields
self.additional_fields = additional_fields if additional_fields is not None else []

@classmethod
@contextmanager
Expand Down Expand Up @@ -152,16 +152,19 @@ def insert_dataset(self, publication_date, dataframe):

num_columns = 2 + len(self.columns_and_types) + len(self.additional_fields)
value_placeholders = ', '.join(['%s'] * num_columns)
sql = f'INSERT INTO `{self.table_name}` VALUES ({value_placeholders})'

columns = ', '.join(f'`{i[1]}`' for i in self.columns_and_types + self.additional_fields)
sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \
f'VALUES ({value_placeholders})'
id_and_publication_date = (0, publication_date)
with self.new_cursor() as cursor:
for _, row in dataframe.iterrows():
values = []
for name, dtype in self.columns_and_types:
for name, _, dtype in self.columns_and_types:
if isinstance(row[name], float) and math.isnan(row[name]):
values.append(None)
else:
values.append(dtype(row[name]))
cursor.execute(sql,
id_and_publication_date + tuple(values) + self.additional_fields)
id_and_publication_date +
tuple(values) +
tuple(i[0] for i in self.additional_fields))
251 changes: 155 additions & 96 deletions src/acquisition/covid_hosp/facility/database.py

Large diffs are not rendered by default.

156 changes: 92 additions & 64 deletions src/acquisition/covid_hosp/state_daily/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,75 +8,103 @@ class Database(BaseDatabase):
# note we share a database with state_timeseries
TABLE_NAME = 'covid_hosp_state_timeseries'

# These are the names that appear in the CSV header, in order of appearance
# in the database table, along with corresponding data type converters.
# However, note that the corresponding database column names may be shorter
# These are 3-tuples of (CSV header name, SQL db column name, data type) for
# all the columns in the CSV file.
# Note that the corresponding database column names may be shorter
# due to constraints on the length of column names. See
# /src/ddl/covid_hosp.sql for more information.
# Additionally, all column names below are shared with state_timeseries,
# except for reporting_cutoff_start (here) and date (there). If you need
# to update a column name, do it in both places.
ORDERED_CSV_COLUMNS = [
('state', str),
('reporting_cutoff_start', Utils.int_from_date),
('critical_staffing_shortage_today_yes', int),
('critical_staffing_shortage_today_no', int),
('critical_staffing_shortage_today_not_reported', int),
('critical_staffing_shortage_anticipated_within_week_yes', int),
('critical_staffing_shortage_anticipated_within_week_no', int),
('critical_staffing_shortage_anticipated_within_week_not_reported', int),
('hospital_onset_covid', int),
('hospital_onset_covid_coverage', int),
('inpatient_beds', int),
('inpatient_beds_coverage', int),
('inpatient_beds_used', int),
('inpatient_beds_used_coverage', int),
('inpatient_beds_used_covid', int),
('inpatient_beds_used_covid_coverage', int),
('previous_day_admission_adult_covid_confirmed', int),
('previous_day_admission_adult_covid_confirmed_coverage', int),
('previous_day_admission_adult_covid_suspected', int),
('previous_day_admission_adult_covid_suspected_coverage', int),
('previous_day_admission_pediatric_covid_confirmed', int),
('previous_day_admission_pediatric_covid_confirmed_coverage', int),
('previous_day_admission_pediatric_covid_suspected', int),
('previous_day_admission_pediatric_covid_suspected_coverage', int),
('staffed_adult_icu_bed_occupancy', int),
('staffed_adult_icu_bed_occupancy_coverage', int),
('staffed_icu_adult_patients_confirmed_and_suspected_covid', int),
('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', int),
('staffed_icu_adult_patients_confirmed_covid', int),
('staffed_icu_adult_patients_confirmed_covid_coverage', int),
('total_adult_patients_hospitalized_confirmed_and_suspected_covid', int),
('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', int),
('total_adult_patients_hospitalized_confirmed_covid', int),
('total_adult_patients_hospitalized_confirmed_covid_coverage', int),
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', int),
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', int),
('total_pediatric_patients_hospitalized_confirmed_covid', int),
('total_pediatric_patients_hospitalized_confirmed_covid_coverage', int),
('total_staffed_adult_icu_beds', int),
('total_staffed_adult_icu_beds_coverage', int),
('inpatient_beds_utilization', float),
('inpatient_beds_utilization_coverage', int),
('inpatient_beds_utilization_numerator', int),
('inpatient_beds_utilization_denominator', int),
('percent_of_inpatients_with_covid', float),
('percent_of_inpatients_with_covid_coverage', int),
('percent_of_inpatients_with_covid_numerator', int),
('percent_of_inpatients_with_covid_denominator', int),
('inpatient_bed_covid_utilization', float),
('inpatient_bed_covid_utilization_coverage', int),
('inpatient_bed_covid_utilization_numerator', int),
('inpatient_bed_covid_utilization_denominator', int),
('adult_icu_bed_covid_utilization', float),
('adult_icu_bed_covid_utilization_coverage', int),
('adult_icu_bed_covid_utilization_numerator', int),
('adult_icu_bed_covid_utilization_denominator', int),
('adult_icu_bed_utilization', float),
('adult_icu_bed_utilization_coverage', int),
('adult_icu_bed_utilization_numerator', int),
('adult_icu_bed_utilization_denominator', int),
('state', 'state', str),
('reporting_cutoff_start', 'reporting_cutoff_start', Utils.int_from_date),
('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int),
('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int),
('critical_staffing_shortage_today_not_reported',
'critical_staffing_shortage_today_not_reported', int),
('critical_staffing_shortage_anticipated_within_week_yes',
'critical_staffing_shortage_anticipated_within_week_yes', int),
('critical_staffing_shortage_anticipated_within_week_no',
'critical_staffing_shortage_anticipated_within_week_no', int),
('critical_staffing_shortage_anticipated_within_week_not_reported',
'critical_staffing_shortage_anticipated_within_week_not_reported', int),
('hospital_onset_covid', 'hospital_onset_covid', int),
('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int),
('inpatient_beds', 'inpatient_beds', int),
('inpatient_beds_coverage', 'inpatient_beds_coverage', int),
('inpatient_beds_used', 'inpatient_beds_used', int),
('inpatient_beds_used_coverage', 'inpatient_beds_used_coverage', int),
('inpatient_beds_used_covid', 'inpatient_beds_used_covid', int),
('inpatient_beds_used_covid_coverage', 'inpatient_beds_used_covid_coverage', int),
('previous_day_admission_adult_covid_confirmed', 'previous_day_admission_adult_covid_confirmed',
int),
('previous_day_admission_adult_covid_confirmed_coverage',
'previous_day_admission_adult_covid_confirmed_coverage', int),
('previous_day_admission_adult_covid_suspected', 'previous_day_admission_adult_covid_suspected',
int),
('previous_day_admission_adult_covid_suspected_coverage',
'previous_day_admission_adult_covid_suspected_coverage', int),
('previous_day_admission_pediatric_covid_confirmed',
'previous_day_admission_pediatric_covid_confirmed', int),
('previous_day_admission_pediatric_covid_confirmed_coverage',
'previous_day_admission_pediatric_covid_confirmed_coverage', int),
('previous_day_admission_pediatric_covid_suspected',
'previous_day_admission_pediatric_covid_suspected', int),
('previous_day_admission_pediatric_covid_suspected_coverage',
'previous_day_admission_pediatric_covid_suspected_coverage', int),
('staffed_adult_icu_bed_occupancy', 'staffed_adult_icu_bed_occupancy', int),
('staffed_adult_icu_bed_occupancy_coverage', 'staffed_adult_icu_bed_occupancy_coverage', int),
('staffed_icu_adult_patients_confirmed_and_suspected_covid',
'staffed_icu_adult_patients_confirmed_suspected_covid', int),
('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage',
'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', int),
('staffed_icu_adult_patients_confirmed_covid', 'staffed_icu_adult_patients_confirmed_covid',
int),
('staffed_icu_adult_patients_confirmed_covid_coverage',
'staffed_icu_adult_patients_confirmed_covid_coverage', int),
('total_adult_patients_hospitalized_confirmed_and_suspected_covid',
'total_adult_patients_hosp_confirmed_suspected_covid', int),
('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage',
'total_adult_patients_hosp_confirmed_suspected_covid_coverage', int),
('total_adult_patients_hospitalized_confirmed_covid',
'total_adult_patients_hosp_confirmed_covid', int),
('total_adult_patients_hospitalized_confirmed_covid_coverage',
'total_adult_patients_hosp_confirmed_covid_coverage', int),
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid',
'total_pediatric_patients_hosp_confirmed_suspected_covid', int),
('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage',
'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', int),
('total_pediatric_patients_hospitalized_confirmed_covid',
'total_pediatric_patients_hosp_confirmed_covid', int),
('total_pediatric_patients_hospitalized_confirmed_covid_coverage',
'total_pediatric_patients_hosp_confirmed_covid_coverage', int),
('total_staffed_adult_icu_beds', 'total_staffed_adult_icu_beds', int),
('total_staffed_adult_icu_beds_coverage', 'total_staffed_adult_icu_beds_coverage', int),
('inpatient_beds_utilization', 'inpatient_beds_utilization', float),
('inpatient_beds_utilization_coverage', 'inpatient_beds_utilization_coverage', int),
('inpatient_beds_utilization_numerator', 'inpatient_beds_utilization_numerator', int),
('inpatient_beds_utilization_denominator', 'inpatient_beds_utilization_denominator', int),
('percent_of_inpatients_with_covid', 'percent_of_inpatients_with_covid', float),
('percent_of_inpatients_with_covid_coverage', 'percent_of_inpatients_with_covid_coverage', int),
('percent_of_inpatients_with_covid_numerator', 'percent_of_inpatients_with_covid_numerator',
int),
('percent_of_inpatients_with_covid_denominator', 'percent_of_inpatients_with_covid_denominator',
int),
('inpatient_bed_covid_utilization', 'inpatient_bed_covid_utilization', float),
('inpatient_bed_covid_utilization_coverage', 'inpatient_bed_covid_utilization_coverage', int),
('inpatient_bed_covid_utilization_numerator', 'inpatient_bed_covid_utilization_numerator', int),
('inpatient_bed_covid_utilization_denominator', 'inpatient_bed_covid_utilization_denominator',
int),
('adult_icu_bed_covid_utilization', 'adult_icu_bed_covid_utilization', float),
('adult_icu_bed_covid_utilization_coverage', 'adult_icu_bed_covid_utilization_coverage', int),
('adult_icu_bed_covid_utilization_numerator', 'adult_icu_bed_covid_utilization_numerator', int),
('adult_icu_bed_covid_utilization_denominator', 'adult_icu_bed_covid_utilization_denominator',
int),
('adult_icu_bed_utilization', 'adult_icu_bed_utilization', float),
('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int),
('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int),
('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int),
]

def __init__(self, *args, **kwargs):
Expand All @@ -85,4 +113,4 @@ def __init__(self, *args, **kwargs):
**kwargs,
table_name=Database.TABLE_NAME,
columns_and_types=Database.ORDERED_CSV_COLUMNS,
additional_fields=('D',))
additional_fields=[('D', 'record_type')])
Loading