From da84f2ffe2232cd1ae3c3c69f5046912f7dfcfbb Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 20 Jan 2021 11:57:21 -0800 Subject: [PATCH 1/3] Add sql column names to csv column mapping --- src/acquisition/covid_hosp/common/database.py | 18 +- .../covid_hosp/facility/database.py | 251 +++++++++++------- .../covid_hosp/state_daily/database.py | 156 ++++++----- .../covid_hosp/state_timeseries/database.py | 156 ++++++----- src/ddl/covid_hosp.sql | 6 + .../covid_hosp/common/test_database.py | 10 +- .../covid_hosp/state_daily/__init__.py | 4 + .../covid_hosp/state_daily/test_database.py | 1 + .../covid_hosp/state_daily/test_network.py | 2 + .../covid_hosp/state_daily/test_update.py | 1 + 10 files changed, 371 insertions(+), 234 deletions(-) create mode 100644 tests/acquisition/covid_hosp/state_daily/__init__.py diff --git a/src/acquisition/covid_hosp/common/database.py b/src/acquisition/covid_hosp/common/database.py index 332de460e..d40545c90 100644 --- a/src/acquisition/covid_hosp/common/database.py +++ b/src/acquisition/covid_hosp/common/database.py @@ -17,7 +17,7 @@ def __init__(self, connection, table_name=None, columns_and_types=None, - additional_fields=tuple()): + additional_fields=None): """Create a new Database object. Parameters @@ -38,8 +38,10 @@ def __init__(self, self.connection = connection self.table_name = table_name + self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \ + 'publication_date' self.columns_and_types = columns_and_types - self.additional_fields = additional_fields + self.additional_fields = additional_fields if additional_fields is not None else [] @classmethod @contextmanager @@ -152,16 +154,20 @@ def insert_dataset(self, publication_date, dataframe): num_columns = 2 + len(self.columns_and_types) + len(self.additional_fields) value_placeholders = ', '.join(['%s'] * num_columns) - sql = f'INSERT INTO `{self.table_name}` VALUES ({value_placeholders})' - + columns = ', '.join([f'`{i[1]}`' for i in self.columns_and_types] + + [i[0] for i in self.additional_fields]) + sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) VALUES ' \ + f'({value_placeholders})' id_and_publication_date = (0, publication_date) with self.new_cursor() as cursor: for _, row in dataframe.iterrows(): values = [] - for name, dtype in self.columns_and_types: + for name, _, dtype in self.columns_and_types: if isinstance(row[name], float) and math.isnan(row[name]): values.append(None) else: values.append(dtype(row[name])) cursor.execute(sql, - id_and_publication_date + tuple(values) + self.additional_fields) + id_and_publication_date + + tuple(values) + + tuple(i[1] for i in self.additional_fields)) diff --git a/src/acquisition/covid_hosp/facility/database.py b/src/acquisition/covid_hosp/facility/database.py index 6fa28ab00..3f29e5d57 100644 --- a/src/acquisition/covid_hosp/facility/database.py +++ b/src/acquisition/covid_hosp/facility/database.py @@ -7,105 +7,164 @@ class Database(BaseDatabase): TABLE_NAME = 'covid_hosp_facility' - # These are the names that appear in the CSV header, in order of appearance - # in the database table, along with corresponding data type converters. - # However, note that the corresponding database column names may be shorter + # These are 3-tuples of (CSV header name, SQL db column name, data type) for + # all the columns in the CSV file. + # Note that the corresponding database column names may be shorter # due to constraints on the length of column names. See # /src/ddl/covid_hosp.sql for more information. ORDERED_CSV_COLUMNS = [ - ('hospital_pk', str), - ('collection_week', Utils.int_from_date), - ('state', str), - ('ccn', str), - ('hospital_name', str), - ('address', str), - ('city', str), - ('zip', str), - ('hospital_subtype', str), - ('fips_code', str), - ('is_metro_micro', Utils.parse_bool), - ('total_beds_7_day_avg', float), - ('all_adult_hospital_beds_7_day_avg', float), - ('all_adult_hospital_inpatient_beds_7_day_avg', float), - ('inpatient_beds_used_7_day_avg', float), - ('all_adult_hospital_inpatient_bed_occupied_7_day_avg', float), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', float), - ('total_adult_patients_hospitalized_confirmed_covid_7_day_avg', float), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', float), - ('total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', float), - ('inpatient_beds_7_day_avg', float), - ('total_icu_beds_7_day_avg', float), - ('total_staffed_adult_icu_beds_7_day_avg', float), - ('icu_beds_used_7_day_avg', float), - ('staffed_adult_icu_bed_occupancy_7_day_avg', float), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg', float), - ('staffed_icu_adult_patients_confirmed_covid_7_day_avg', float), - ('total_patients_hospitalized_confirmed_influenza_7_day_avg', float), - ('icu_patients_confirmed_influenza_7_day_avg', float), - ('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg', float), - ('total_beds_7_day_sum', int), - ('all_adult_hospital_beds_7_day_sum', int), - ('all_adult_hospital_inpatient_beds_7_day_sum', int), - ('inpatient_beds_used_7_day_sum', int), - ('all_adult_hospital_inpatient_bed_occupied_7_day_sum', int), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum', int), - ('total_adult_patients_hospitalized_confirmed_covid_7_day_sum', int), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum', int), - ('total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum', int), - ('inpatient_beds_7_day_sum', int), - ('total_icu_beds_7_day_sum', int), - ('total_staffed_adult_icu_beds_7_day_sum', int), - ('icu_beds_used_7_day_sum', int), - ('staffed_adult_icu_bed_occupancy_7_day_sum', int), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum', int), - ('staffed_icu_adult_patients_confirmed_covid_7_day_sum', int), - ('total_patients_hospitalized_confirmed_influenza_7_day_sum', int), - ('icu_patients_confirmed_influenza_7_day_sum', int), - ('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum', int), - ('total_beds_7_day_coverage', int), - ('all_adult_hospital_beds_7_day_coverage', int), - ('all_adult_hospital_inpatient_beds_7_day_coverage', int), - ('inpatient_beds_used_7_day_coverage', int), - ('all_adult_hospital_inpatient_bed_occupied_7_day_coverage', int), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', int), - ('total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', int), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', int), - ('total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage', int), - ('inpatient_beds_7_day_coverage', int), - ('total_icu_beds_7_day_coverage', int), - ('total_staffed_adult_icu_beds_7_day_coverage', int), - ('icu_beds_used_7_day_coverage', int), - ('staffed_adult_icu_bed_occupancy_7_day_coverage', int), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage', int), - ('staffed_icu_adult_patients_confirmed_covid_7_day_coverage', int), - ('total_patients_hospitalized_confirmed_influenza_7_day_coverage', int), - ('icu_patients_confirmed_influenza_7_day_coverage', int), - ('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage', int), - ('previous_day_admission_adult_covid_confirmed_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_18-19_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_20-29_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_30-39_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_40-49_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_50-59_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_60-69_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_70-79_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_80+_7_day_sum', int), - ('previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', int), - ('previous_day_admission_pediatric_covid_confirmed_7_day_sum', int), - ('previous_day_covid_ED_visits_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_18-19_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_20-29_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_30-39_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_40-49_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_50-59_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_60-69_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_70-79_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_80+_7_day_sum', int), - ('previous_day_admission_adult_covid_suspected_unknown_7_day_sum', int), - ('previous_day_admission_pediatric_covid_suspected_7_day_sum', int), - ('previous_day_total_ED_visits_7_day_sum', int), - ('previous_day_admission_influenza_confirmed_7_day_sum', int), + ('hospital_pk', 'hospital_pk', str), + ('collection_week', 'collection_week', Utils.int_from_date), + ('state', 'state', str), + ('ccn', 'ccn', str), + ('hospital_name', 'hospital_name', str), + ('address', 'address', str), + ('city', 'city', str), + ('zip', 'zip', str), + ('hospital_subtype', 'hospital_subtype', str), + ('fips_code', 'fips_code', str), + ('is_metro_micro', 'is_metro_micro', Utils.parse_bool), + ('total_beds_7_day_avg', 'total_beds_7_day_avg', float), + ('all_adult_hospital_beds_7_day_avg', 'all_adult_hospital_beds_7_day_avg', float), + ('all_adult_hospital_inpatient_beds_7_day_avg', 'all_adult_hospital_inpatient_beds_7_day_avg', + float), + ('inpatient_beds_used_7_day_avg', 'inpatient_beds_used_7_day_avg', float), + ('all_adult_hospital_inpatient_bed_occupied_7_day_avg', + 'all_adult_hospital_inpatient_bed_occupied_7_day_avg', float), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', + 'total_adult_patients_hosp_confirmed_suspected_covid_7d_avg', float), + ('total_adult_patients_hospitalized_confirmed_covid_7_day_avg', + 'total_adult_patients_hospitalized_confirmed_covid_7_day_avg', float), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_avg', + 'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_avg', float), + ('total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', + 'total_pediatric_patients_hospitalized_confirmed_covid_7_day_avg', float), + ('inpatient_beds_7_day_avg', 'inpatient_beds_7_day_avg', float), + ('total_icu_beds_7_day_avg', 'total_icu_beds_7_day_avg', float), + ('total_staffed_adult_icu_beds_7_day_avg', 'total_staffed_adult_icu_beds_7_day_avg', float), + ('icu_beds_used_7_day_avg', 'icu_beds_used_7_day_avg', float), + ('staffed_adult_icu_bed_occupancy_7_day_avg', 'staffed_adult_icu_bed_occupancy_7_day_avg', + float), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_avg', + 'staffed_icu_adult_patients_confirmed_suspected_covid_7d_avg', float), + ('staffed_icu_adult_patients_confirmed_covid_7_day_avg', + 'staffed_icu_adult_patients_confirmed_covid_7_day_avg', float), + ('total_patients_hospitalized_confirmed_influenza_7_day_avg', + 'total_patients_hospitalized_confirmed_influenza_7_day_avg', float), + ('icu_patients_confirmed_influenza_7_day_avg', 'icu_patients_confirmed_influenza_7_day_avg', + float), + ('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_avg', + 'total_patients_hosp_confirmed_influenza_and_covid_7d_avg', float), + ('total_beds_7_day_sum', 'total_beds_7_day_sum', int), + ('all_adult_hospital_beds_7_day_sum', 'all_adult_hospital_beds_7_day_sum', int), + ('all_adult_hospital_inpatient_beds_7_day_sum', 'all_adult_hospital_inpatient_beds_7_day_sum', + int), + ('inpatient_beds_used_7_day_sum', 'inpatient_beds_used_7_day_sum', int), + ('all_adult_hospital_inpatient_bed_occupied_7_day_sum', + 'all_adult_hospital_inpatient_bed_occupied_7_day_sum', int), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum', + 'total_adult_patients_hosp_confirmed_suspected_covid_7d_sum', int), + ('total_adult_patients_hospitalized_confirmed_covid_7_day_sum', + 'total_adult_patients_hospitalized_confirmed_covid_7_day_sum', int), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum', + 'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_sum', int), + ('total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum', + 'total_pediatric_patients_hospitalized_confirmed_covid_7_day_sum', int), + ('inpatient_beds_7_day_sum', 'inpatient_beds_7_day_sum', int), + ('total_icu_beds_7_day_sum', 'total_icu_beds_7_day_sum', int), + ('total_staffed_adult_icu_beds_7_day_sum', 'total_staffed_adult_icu_beds_7_day_sum', int), + ('icu_beds_used_7_day_sum', 'icu_beds_used_7_day_sum', int), + ('staffed_adult_icu_bed_occupancy_7_day_sum', 'staffed_adult_icu_bed_occupancy_7_day_sum', int), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_sum', + 'staffed_icu_adult_patients_confirmed_suspected_covid_7d_sum', int), + ('staffed_icu_adult_patients_confirmed_covid_7_day_sum', + 'staffed_icu_adult_patients_confirmed_covid_7_day_sum', int), + ('total_patients_hospitalized_confirmed_influenza_7_day_sum', + 'total_patients_hospitalized_confirmed_influenza_7_day_sum', int), + ('icu_patients_confirmed_influenza_7_day_sum', 'icu_patients_confirmed_influenza_7_day_sum', + int), + ('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_sum', + 'total_patients_hosp_confirmed_influenza_and_covid_7d_sum', int), + ('total_beds_7_day_coverage', 'total_beds_7_day_coverage', int), + ('all_adult_hospital_beds_7_day_coverage', 'all_adult_hospital_beds_7_day_coverage', int), + ('all_adult_hospital_inpatient_beds_7_day_coverage', + 'all_adult_hospital_inpatient_beds_7_day_coverage', int), + ('inpatient_beds_used_7_day_coverage', 'inpatient_beds_used_7_day_coverage', int), + ('all_adult_hospital_inpatient_bed_occupied_7_day_coverage', + 'all_adult_hospital_inpatient_bed_occupied_7_day_coverage', int), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', + 'total_adult_patients_hosp_confirmed_suspected_covid_7d_cov', int), + ('total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', + 'total_adult_patients_hospitalized_confirmed_covid_7_day_coverage', int), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_7_day_coverage', + 'total_pediatric_patients_hosp_confirmed_suspected_covid_7d_cov', int), + ('total_pediatric_patients_hospitalized_confirmed_covid_7_day_coverage', + 'total_pediatric_patients_hosp_confirmed_covid_7d_cov', int), + ('inpatient_beds_7_day_coverage', 'inpatient_beds_7_day_coverage', int), + ('total_icu_beds_7_day_coverage', 'total_icu_beds_7_day_coverage', int), + ('total_staffed_adult_icu_beds_7_day_coverage', 'total_staffed_adult_icu_beds_7_day_coverage', + int), + ('icu_beds_used_7_day_coverage', 'icu_beds_used_7_day_coverage', int), + ('staffed_adult_icu_bed_occupancy_7_day_coverage', + 'staffed_adult_icu_bed_occupancy_7_day_coverage', int), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid_7_day_coverage', + 'staffed_icu_adult_patients_confirmed_suspected_covid_7d_cov', int), + ('staffed_icu_adult_patients_confirmed_covid_7_day_coverage', + 'staffed_icu_adult_patients_confirmed_covid_7_day_coverage', int), + ('total_patients_hospitalized_confirmed_influenza_7_day_coverage', + 'total_patients_hospitalized_confirmed_influenza_7_day_coverage', int), + ('icu_patients_confirmed_influenza_7_day_coverage', + 'icu_patients_confirmed_influenza_7_day_coverage', int), + ('total_patients_hospitalized_confirmed_influenza_and_covid_7_day_coverage', + 'total_patients_hosp_confirmed_influenza_and_covid_7d_cov', int), + ('previous_day_admission_adult_covid_confirmed_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_18-19_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_18_19_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_20-29_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_20_29_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_30-39_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_30_39_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_40-49_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_40_49_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_50-59_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_50_59_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_60-69_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_60_69_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_70-79_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_70_79_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_80+_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_80plus_7_day_sum', int), + ('previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', + 'previous_day_admission_adult_covid_confirmed_unknown_7_day_sum', int), + ('previous_day_admission_pediatric_covid_confirmed_7_day_sum', + 'previous_day_admission_pediatric_covid_confirmed_7_day_sum', int), + ('previous_day_covid_ED_visits_7_day_sum', 'previous_day_covid_ed_visits_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_7_day_sum', + 'previous_day_admission_adult_covid_suspected_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_18-19_7_day_sum', + 'previous_day_admission_adult_covid_suspected_18_19_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_20-29_7_day_sum', + 'previous_day_admission_adult_covid_suspected_20_29_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_30-39_7_day_sum', + 'previous_day_admission_adult_covid_suspected_30_39_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_40-49_7_day_sum', + 'previous_day_admission_adult_covid_suspected_40_49_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_50-59_7_day_sum', + 'previous_day_admission_adult_covid_suspected_50_59_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_60-69_7_day_sum', + 'previous_day_admission_adult_covid_suspected_60_69_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_70-79_7_day_sum', + 'previous_day_admission_adult_covid_suspected_70_79_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_80+_7_day_sum', + 'previous_day_admission_adult_covid_suspected_80plus_7_day_sum', int), + ('previous_day_admission_adult_covid_suspected_unknown_7_day_sum', + 'previous_day_admission_adult_covid_suspected_unknown_7_day_sum', int), + ('previous_day_admission_pediatric_covid_suspected_7_day_sum', + 'previous_day_admission_pediatric_covid_suspected_7_day_sum', int), + ('previous_day_total_ED_visits_7_day_sum', 'previous_day_total_ed_visits_7_day_sum', int), + ('previous_day_admission_influenza_confirmed_7_day_sum', + 'previous_day_admission_influenza_confirmed_7_day_sum', int), ] def __init__(self, *args, **kwargs): diff --git a/src/acquisition/covid_hosp/state_daily/database.py b/src/acquisition/covid_hosp/state_daily/database.py index 1f3663123..b69641e63 100644 --- a/src/acquisition/covid_hosp/state_daily/database.py +++ b/src/acquisition/covid_hosp/state_daily/database.py @@ -8,75 +8,103 @@ class Database(BaseDatabase): # note we share a database with state_timeseries TABLE_NAME = 'covid_hosp_state_timeseries' - # These are the names that appear in the CSV header, in order of appearance - # in the database table, along with corresponding data type converters. - # However, note that the corresponding database column names may be shorter + # These are 3-tuples of (CSV header name, SQL db column name, data type) for + # all the columns in the CSV file. + # Note that the corresponding database column names may be shorter # due to constraints on the length of column names. See # /src/ddl/covid_hosp.sql for more information. # Additionally, all column names below are shared with state_timeseries, # except for reporting_cutoff_start (here) and date (there). If you need # to update a column name, do it in both places. ORDERED_CSV_COLUMNS = [ - ('state', str), - ('reporting_cutoff_start', Utils.int_from_date), - ('critical_staffing_shortage_today_yes', int), - ('critical_staffing_shortage_today_no', int), - ('critical_staffing_shortage_today_not_reported', int), - ('critical_staffing_shortage_anticipated_within_week_yes', int), - ('critical_staffing_shortage_anticipated_within_week_no', int), - ('critical_staffing_shortage_anticipated_within_week_not_reported', int), - ('hospital_onset_covid', int), - ('hospital_onset_covid_coverage', int), - ('inpatient_beds', int), - ('inpatient_beds_coverage', int), - ('inpatient_beds_used', int), - ('inpatient_beds_used_coverage', int), - ('inpatient_beds_used_covid', int), - ('inpatient_beds_used_covid_coverage', int), - ('previous_day_admission_adult_covid_confirmed', int), - ('previous_day_admission_adult_covid_confirmed_coverage', int), - ('previous_day_admission_adult_covid_suspected', int), - ('previous_day_admission_adult_covid_suspected_coverage', int), - ('previous_day_admission_pediatric_covid_confirmed', int), - ('previous_day_admission_pediatric_covid_confirmed_coverage', int), - ('previous_day_admission_pediatric_covid_suspected', int), - ('previous_day_admission_pediatric_covid_suspected_coverage', int), - ('staffed_adult_icu_bed_occupancy', int), - ('staffed_adult_icu_bed_occupancy_coverage', int), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid', int), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', int), - ('staffed_icu_adult_patients_confirmed_covid', int), - ('staffed_icu_adult_patients_confirmed_covid_coverage', int), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid', int), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', int), - ('total_adult_patients_hospitalized_confirmed_covid', int), - ('total_adult_patients_hospitalized_confirmed_covid_coverage', int), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', int), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', int), - ('total_pediatric_patients_hospitalized_confirmed_covid', int), - ('total_pediatric_patients_hospitalized_confirmed_covid_coverage', int), - ('total_staffed_adult_icu_beds', int), - ('total_staffed_adult_icu_beds_coverage', int), - ('inpatient_beds_utilization', float), - ('inpatient_beds_utilization_coverage', int), - ('inpatient_beds_utilization_numerator', int), - ('inpatient_beds_utilization_denominator', int), - ('percent_of_inpatients_with_covid', float), - ('percent_of_inpatients_with_covid_coverage', int), - ('percent_of_inpatients_with_covid_numerator', int), - ('percent_of_inpatients_with_covid_denominator', int), - ('inpatient_bed_covid_utilization', float), - ('inpatient_bed_covid_utilization_coverage', int), - ('inpatient_bed_covid_utilization_numerator', int), - ('inpatient_bed_covid_utilization_denominator', int), - ('adult_icu_bed_covid_utilization', float), - ('adult_icu_bed_covid_utilization_coverage', int), - ('adult_icu_bed_covid_utilization_numerator', int), - ('adult_icu_bed_covid_utilization_denominator', int), - ('adult_icu_bed_utilization', float), - ('adult_icu_bed_utilization_coverage', int), - ('adult_icu_bed_utilization_numerator', int), - ('adult_icu_bed_utilization_denominator', int), + ('state', 'state', str), + ('reporting_cutoff_start', 'reporting_cutoff_start', Utils.int_from_date), + ('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int), + ('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int), + ('critical_staffing_shortage_today_not_reported', + 'critical_staffing_shortage_today_not_reported', int), + ('critical_staffing_shortage_anticipated_within_week_yes', + 'critical_staffing_shortage_anticipated_within_week_yes', int), + ('critical_staffing_shortage_anticipated_within_week_no', + 'critical_staffing_shortage_anticipated_within_week_no', int), + ('critical_staffing_shortage_anticipated_within_week_not_reported', + 'critical_staffing_shortage_anticipated_within_week_not_reported', int), + ('hospital_onset_covid', 'hospital_onset_covid', int), + ('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int), + ('inpatient_beds', 'inpatient_beds', int), + ('inpatient_beds_coverage', 'inpatient_beds_coverage', int), + ('inpatient_beds_used', 'inpatient_beds_used', int), + ('inpatient_beds_used_coverage', 'inpatient_beds_used_coverage', int), + ('inpatient_beds_used_covid', 'inpatient_beds_used_covid', int), + ('inpatient_beds_used_covid_coverage', 'inpatient_beds_used_covid_coverage', int), + ('previous_day_admission_adult_covid_confirmed', 'previous_day_admission_adult_covid_confirmed', + int), + ('previous_day_admission_adult_covid_confirmed_coverage', + 'previous_day_admission_adult_covid_confirmed_coverage', int), + ('previous_day_admission_adult_covid_suspected', 'previous_day_admission_adult_covid_suspected', + int), + ('previous_day_admission_adult_covid_suspected_coverage', + 'previous_day_admission_adult_covid_suspected_coverage', int), + ('previous_day_admission_pediatric_covid_confirmed', + 'previous_day_admission_pediatric_covid_confirmed', int), + ('previous_day_admission_pediatric_covid_confirmed_coverage', + 'previous_day_admission_pediatric_covid_confirmed_coverage', int), + ('previous_day_admission_pediatric_covid_suspected', + 'previous_day_admission_pediatric_covid_suspected', int), + ('previous_day_admission_pediatric_covid_suspected_coverage', + 'previous_day_admission_pediatric_covid_suspected_coverage', int), + ('staffed_adult_icu_bed_occupancy', 'staffed_adult_icu_bed_occupancy', int), + ('staffed_adult_icu_bed_occupancy_coverage', 'staffed_adult_icu_bed_occupancy_coverage', int), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid', + 'staffed_icu_adult_patients_confirmed_suspected_covid', int), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', + 'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', int), + ('staffed_icu_adult_patients_confirmed_covid', 'staffed_icu_adult_patients_confirmed_covid', + int), + ('staffed_icu_adult_patients_confirmed_covid_coverage', + 'staffed_icu_adult_patients_confirmed_covid_coverage', int), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid', + 'total_adult_patients_hosp_confirmed_suspected_covid', int), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', + 'total_adult_patients_hosp_confirmed_suspected_covid_coverage', int), + ('total_adult_patients_hospitalized_confirmed_covid', + 'total_adult_patients_hosp_confirmed_covid', int), + ('total_adult_patients_hospitalized_confirmed_covid_coverage', + 'total_adult_patients_hosp_confirmed_covid_coverage', int), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', + 'total_pediatric_patients_hosp_confirmed_suspected_covid', int), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', + 'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', int), + ('total_pediatric_patients_hospitalized_confirmed_covid', + 'total_pediatric_patients_hosp_confirmed_covid', int), + ('total_pediatric_patients_hospitalized_confirmed_covid_coverage', + 'total_pediatric_patients_hosp_confirmed_covid_coverage', int), + ('total_staffed_adult_icu_beds', 'total_staffed_adult_icu_beds', int), + ('total_staffed_adult_icu_beds_coverage', 'total_staffed_adult_icu_beds_coverage', int), + ('inpatient_beds_utilization', 'inpatient_beds_utilization', float), + ('inpatient_beds_utilization_coverage', 'inpatient_beds_utilization_coverage', int), + ('inpatient_beds_utilization_numerator', 'inpatient_beds_utilization_numerator', int), + ('inpatient_beds_utilization_denominator', 'inpatient_beds_utilization_denominator', int), + ('percent_of_inpatients_with_covid', 'percent_of_inpatients_with_covid', float), + ('percent_of_inpatients_with_covid_coverage', 'percent_of_inpatients_with_covid_coverage', int), + ('percent_of_inpatients_with_covid_numerator', 'percent_of_inpatients_with_covid_numerator', + int), + ('percent_of_inpatients_with_covid_denominator', 'percent_of_inpatients_with_covid_denominator', + int), + ('inpatient_bed_covid_utilization', 'inpatient_bed_covid_utilization', float), + ('inpatient_bed_covid_utilization_coverage', 'inpatient_bed_covid_utilization_coverage', int), + ('inpatient_bed_covid_utilization_numerator', 'inpatient_bed_covid_utilization_numerator', int), + ('inpatient_bed_covid_utilization_denominator', 'inpatient_bed_covid_utilization_denominator', + int), + ('adult_icu_bed_covid_utilization', 'adult_icu_bed_covid_utilization', float), + ('adult_icu_bed_covid_utilization_coverage', 'adult_icu_bed_covid_utilization_coverage', int), + ('adult_icu_bed_covid_utilization_numerator', 'adult_icu_bed_covid_utilization_numerator', int), + ('adult_icu_bed_covid_utilization_denominator', 'adult_icu_bed_covid_utilization_denominator', + int), + ('adult_icu_bed_utilization', 'adult_icu_bed_utilization', float), + ('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int), + ('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int), + ('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int), ] def __init__(self, *args, **kwargs): @@ -85,4 +113,4 @@ def __init__(self, *args, **kwargs): **kwargs, table_name=Database.TABLE_NAME, columns_and_types=Database.ORDERED_CSV_COLUMNS, - additional_fields=('D',)) + additional_fields=[('record_type', 'D')]) diff --git a/src/acquisition/covid_hosp/state_timeseries/database.py b/src/acquisition/covid_hosp/state_timeseries/database.py index 36438a0a3..db401da1f 100644 --- a/src/acquisition/covid_hosp/state_timeseries/database.py +++ b/src/acquisition/covid_hosp/state_timeseries/database.py @@ -7,75 +7,103 @@ class Database(BaseDatabase): TABLE_NAME = 'covid_hosp_state_timeseries' - # These are the names that appear in the CSV header, in order of appearance - # in the database table, along with corresponding data type converters. - # However, note that the corresponding database column names may be shorter + # These are 3-tuples of (CSV header name, SQL db column name, data type) for + # all the columns in the CSV file. + # Note that the corresponding database column names may be shorter # due to constraints on the length of column names. See # /src/ddl/covid_hosp.sql for more information. # Additionally, all column names below are shared with state_daily, # except for reporting_cutoff_start (there) and date (here). If you need # to update a column name, do it in both places. ORDERED_CSV_COLUMNS = [ - ('state', str), - ('date', Utils.int_from_date), - ('critical_staffing_shortage_today_yes', int), - ('critical_staffing_shortage_today_no', int), - ('critical_staffing_shortage_today_not_reported', int), - ('critical_staffing_shortage_anticipated_within_week_yes', int), - ('critical_staffing_shortage_anticipated_within_week_no', int), - ('critical_staffing_shortage_anticipated_within_week_not_reported', int), - ('hospital_onset_covid', int), - ('hospital_onset_covid_coverage', int), - ('inpatient_beds', int), - ('inpatient_beds_coverage', int), - ('inpatient_beds_used', int), - ('inpatient_beds_used_coverage', int), - ('inpatient_beds_used_covid', int), - ('inpatient_beds_used_covid_coverage', int), - ('previous_day_admission_adult_covid_confirmed', int), - ('previous_day_admission_adult_covid_confirmed_coverage', int), - ('previous_day_admission_adult_covid_suspected', int), - ('previous_day_admission_adult_covid_suspected_coverage', int), - ('previous_day_admission_pediatric_covid_confirmed', int), - ('previous_day_admission_pediatric_covid_confirmed_coverage', int), - ('previous_day_admission_pediatric_covid_suspected', int), - ('previous_day_admission_pediatric_covid_suspected_coverage', int), - ('staffed_adult_icu_bed_occupancy', int), - ('staffed_adult_icu_bed_occupancy_coverage', int), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid', int), - ('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', int), - ('staffed_icu_adult_patients_confirmed_covid', int), - ('staffed_icu_adult_patients_confirmed_covid_coverage', int), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid', int), - ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', int), - ('total_adult_patients_hospitalized_confirmed_covid', int), - ('total_adult_patients_hospitalized_confirmed_covid_coverage', int), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', int), - ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', int), - ('total_pediatric_patients_hospitalized_confirmed_covid', int), - ('total_pediatric_patients_hospitalized_confirmed_covid_coverage', int), - ('total_staffed_adult_icu_beds', int), - ('total_staffed_adult_icu_beds_coverage', int), - ('inpatient_beds_utilization', float), - ('inpatient_beds_utilization_coverage', int), - ('inpatient_beds_utilization_numerator', int), - ('inpatient_beds_utilization_denominator', int), - ('percent_of_inpatients_with_covid', float), - ('percent_of_inpatients_with_covid_coverage', int), - ('percent_of_inpatients_with_covid_numerator', int), - ('percent_of_inpatients_with_covid_denominator', int), - ('inpatient_bed_covid_utilization', float), - ('inpatient_bed_covid_utilization_coverage', int), - ('inpatient_bed_covid_utilization_numerator', int), - ('inpatient_bed_covid_utilization_denominator', int), - ('adult_icu_bed_covid_utilization', float), - ('adult_icu_bed_covid_utilization_coverage', int), - ('adult_icu_bed_covid_utilization_numerator', int), - ('adult_icu_bed_covid_utilization_denominator', int), - ('adult_icu_bed_utilization', float), - ('adult_icu_bed_utilization_coverage', int), - ('adult_icu_bed_utilization_numerator', int), - ('adult_icu_bed_utilization_denominator', int), + ('state', 'state', str), + ('date', 'date', Utils.int_from_date), + ('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int), + ('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int), + ('critical_staffing_shortage_today_not_reported', + 'critical_staffing_shortage_today_not_reported', int), + ('critical_staffing_shortage_anticipated_within_week_yes', + 'critical_staffing_shortage_anticipated_within_week_yes', int), + ('critical_staffing_shortage_anticipated_within_week_no', + 'critical_staffing_shortage_anticipated_within_week_no', int), + ('critical_staffing_shortage_anticipated_within_week_not_reported', + 'critical_staffing_shortage_anticipated_within_week_not_reported', int), + ('hospital_onset_covid', 'hospital_onset_covid', int), + ('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int), + ('inpatient_beds', 'inpatient_beds', int), + ('inpatient_beds_coverage', 'inpatient_beds_coverage', int), + ('inpatient_beds_used', 'inpatient_beds_used', int), + ('inpatient_beds_used_coverage', 'inpatient_beds_used_coverage', int), + ('inpatient_beds_used_covid', 'inpatient_beds_used_covid', int), + ('inpatient_beds_used_covid_coverage', 'inpatient_beds_used_covid_coverage', int), + ('previous_day_admission_adult_covid_confirmed', 'previous_day_admission_adult_covid_confirmed', + int), + ('previous_day_admission_adult_covid_confirmed_coverage', + 'previous_day_admission_adult_covid_confirmed_coverage', int), + ('previous_day_admission_adult_covid_suspected', 'previous_day_admission_adult_covid_suspected', + int), + ('previous_day_admission_adult_covid_suspected_coverage', + 'previous_day_admission_adult_covid_suspected_coverage', int), + ('previous_day_admission_pediatric_covid_confirmed', + 'previous_day_admission_pediatric_covid_confirmed', int), + ('previous_day_admission_pediatric_covid_confirmed_coverage', + 'previous_day_admission_pediatric_covid_confirmed_coverage', int), + ('previous_day_admission_pediatric_covid_suspected', + 'previous_day_admission_pediatric_covid_suspected', int), + ('previous_day_admission_pediatric_covid_suspected_coverage', + 'previous_day_admission_pediatric_covid_suspected_coverage', int), + ('staffed_adult_icu_bed_occupancy', 'staffed_adult_icu_bed_occupancy', int), + ('staffed_adult_icu_bed_occupancy_coverage', 'staffed_adult_icu_bed_occupancy_coverage', int), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid', + 'staffed_icu_adult_patients_confirmed_suspected_covid', int), + ('staffed_icu_adult_patients_confirmed_and_suspected_covid_coverage', + 'staffed_icu_adult_patients_confirmed_suspected_covid_coverage', int), + ('staffed_icu_adult_patients_confirmed_covid', 'staffed_icu_adult_patients_confirmed_covid', + int), + ('staffed_icu_adult_patients_confirmed_covid_coverage', + 'staffed_icu_adult_patients_confirmed_covid_coverage', int), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid', + 'total_adult_patients_hosp_confirmed_suspected_covid', int), + ('total_adult_patients_hospitalized_confirmed_and_suspected_covid_coverage', + 'total_adult_patients_hosp_confirmed_suspected_covid_coverage', int), + ('total_adult_patients_hospitalized_confirmed_covid', + 'total_adult_patients_hosp_confirmed_covid', int), + ('total_adult_patients_hospitalized_confirmed_covid_coverage', + 'total_adult_patients_hosp_confirmed_covid_coverage', int), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid', + 'total_pediatric_patients_hosp_confirmed_suspected_covid', int), + ('total_pediatric_patients_hospitalized_confirmed_and_suspected_covid_coverage', + 'total_pediatric_patients_hosp_confirmed_suspected_covid_coverage', int), + ('total_pediatric_patients_hospitalized_confirmed_covid', + 'total_pediatric_patients_hosp_confirmed_covid', int), + ('total_pediatric_patients_hospitalized_confirmed_covid_coverage', + 'total_pediatric_patients_hosp_confirmed_covid_coverage', int), + ('total_staffed_adult_icu_beds', 'total_staffed_adult_icu_beds', int), + ('total_staffed_adult_icu_beds_coverage', 'total_staffed_adult_icu_beds_coverage', int), + ('inpatient_beds_utilization', 'inpatient_beds_utilization', float), + ('inpatient_beds_utilization_coverage', 'inpatient_beds_utilization_coverage', int), + ('inpatient_beds_utilization_numerator', 'inpatient_beds_utilization_numerator', int), + ('inpatient_beds_utilization_denominator', 'inpatient_beds_utilization_denominator', int), + ('percent_of_inpatients_with_covid', 'percent_of_inpatients_with_covid', float), + ('percent_of_inpatients_with_covid_coverage', 'percent_of_inpatients_with_covid_coverage', int), + ('percent_of_inpatients_with_covid_numerator', 'percent_of_inpatients_with_covid_numerator', + int), + ('percent_of_inpatients_with_covid_denominator', 'percent_of_inpatients_with_covid_denominator', + int), + ('inpatient_bed_covid_utilization', 'inpatient_bed_covid_utilization', float), + ('inpatient_bed_covid_utilization_coverage', 'inpatient_bed_covid_utilization_coverage', int), + ('inpatient_bed_covid_utilization_numerator', 'inpatient_bed_covid_utilization_numerator', int), + ('inpatient_bed_covid_utilization_denominator', 'inpatient_bed_covid_utilization_denominator', + int), + ('adult_icu_bed_covid_utilization', 'adult_icu_bed_covid_utilization', float), + ('adult_icu_bed_covid_utilization_coverage', 'adult_icu_bed_covid_utilization_coverage', int), + ('adult_icu_bed_covid_utilization_numerator', 'adult_icu_bed_covid_utilization_numerator', int), + ('adult_icu_bed_covid_utilization_denominator', 'adult_icu_bed_covid_utilization_denominator', + int), + ('adult_icu_bed_utilization', 'adult_icu_bed_utilization', float), + ('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int), + ('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int), + ('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int), ] def __init__(self, *args, **kwargs): @@ -84,4 +112,4 @@ def __init__(self, *args, **kwargs): **kwargs, table_name=Database.TABLE_NAME, columns_and_types=Database.ORDERED_CSV_COLUMNS, - additional_fields=('T',)) + additional_fields=[('record_type', 'T')]) diff --git a/src/ddl/covid_hosp.sql b/src/ddl/covid_hosp.sql index 3591dc283..237e053c2 100644 --- a/src/ddl/covid_hosp.sql +++ b/src/ddl/covid_hosp.sql @@ -71,6 +71,12 @@ Data is public under the Open Data Commons Open Database License (ODbL). | issue | int(11) | NO | MUL | NULL | | | state | char(2) | NO | MUL | NULL | | | date | int(11) | NO | | NULL | | +| critical_staffing_shortage_today_yes | int(11) | YES | | NULL | | +| critical_staffing_shortage_today_no | int(11) | YES | | NULL | | +| critical_staffing_shortage_today_not_reported | int(11) | YES | | NULL | | +| critical_staffing_shortage_anticipated_within_week_yes | int(11) | YES | | NULL | | +| critical_staffing_shortage_anticipated_within_week_no | int(11) | YES | | NULL | | +| critical_staffing_shortage_anticipated_within_week_not_reported | int(11) | YES | | NULL | | | hospital_onset_covid | int(11) | YES | | NULL | | | hospital_onset_covid_coverage | int(11) | YES | | NULL | | | inpatient_beds | int(11) | YES | | NULL | | diff --git a/tests/acquisition/covid_hosp/common/test_database.py b/tests/acquisition/covid_hosp/common/test_database.py index b9b2e7694..7db9f58e9 100644 --- a/tests/acquisition/covid_hosp/common/test_database.py +++ b/tests/acquisition/covid_hosp/common/test_database.py @@ -123,9 +123,9 @@ def test_insert_dataset(self): table_name = 'test_table' columns_and_types = [ - ('str_col', str), - ('int_col', int), - ('float_col', float), + ('str_col', 'sql_str_col', str), + ('int_col', 'sql_int_col', int), + ('float_col', 'sql_float_col', float), ] mock_connection = MagicMock() mock_cursor = mock_connection.cursor() @@ -146,7 +146,9 @@ def test_insert_dataset(self): self.assertEqual(mock_cursor.execute.call_count, 6) actual_sql = mock_cursor.execute.call_args[0][0] - self.assertIn('insert into `test_table` values', actual_sql.lower()) + self.assertIn( + 'INSERT INTO `test_table` (`id`, `publication_date`, `sql_str_col`, `sql_int_col`, `sql_float_col`)', + actual_sql) expected_values = [ ('a', 1, 0.1), diff --git a/tests/acquisition/covid_hosp/state_daily/__init__.py b/tests/acquisition/covid_hosp/state_daily/__init__.py new file mode 100644 index 000000000..e197f3ec4 --- /dev/null +++ b/tests/acquisition/covid_hosp/state_daily/__init__.py @@ -0,0 +1,4 @@ +import sys +import os + +sys.path.append(os.getcwd()) diff --git a/tests/acquisition/covid_hosp/state_daily/test_database.py b/tests/acquisition/covid_hosp/state_daily/test_database.py index 89fcaa8e7..d9a9747fb 100644 --- a/tests/acquisition/covid_hosp/state_daily/test_database.py +++ b/tests/acquisition/covid_hosp/state_daily/test_database.py @@ -7,6 +7,7 @@ # first party from delphi.epidata.acquisition.covid_hosp.common.test_utils import TestUtils +from delphi.epidata.acquisition.covid_hosp.state_daily.database import Database # py3tester coverage target __test_target__ = \ diff --git a/tests/acquisition/covid_hosp/state_daily/test_network.py b/tests/acquisition/covid_hosp/state_daily/test_network.py index ab5883ced..a1af886b6 100644 --- a/tests/acquisition/covid_hosp/state_daily/test_network.py +++ b/tests/acquisition/covid_hosp/state_daily/test_network.py @@ -5,6 +5,8 @@ from unittest.mock import patch from unittest.mock import sentinel +from delphi.epidata.acquisition.covid_hosp.state_daily.network import Network + # py3tester coverage target __test_target__ = \ 'delphi.epidata.acquisition.covid_hosp.state_daily.network' diff --git a/tests/acquisition/covid_hosp/state_daily/test_update.py b/tests/acquisition/covid_hosp/state_daily/test_update.py index 93ef58df2..276fb2e6e 100644 --- a/tests/acquisition/covid_hosp/state_daily/test_update.py +++ b/tests/acquisition/covid_hosp/state_daily/test_update.py @@ -7,6 +7,7 @@ # first party from delphi.epidata.acquisition.covid_hosp.common.utils import Utils +from delphi.epidata.acquisition.covid_hosp.state_daily.update import Update # py3tester coverage target __test_target__ = \ From 407be2c43cc47a1573b2d8ddf09bf475132f5085 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 20 Jan 2021 13:38:23 -0800 Subject: [PATCH 2/3] Rearrange columns that to ensure tests still pass --- .../covid_hosp/state_timeseries/database.py | 20 +++++++++---------- .../state_timeseries/test_database.py | 5 +++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/acquisition/covid_hosp/state_timeseries/database.py b/src/acquisition/covid_hosp/state_timeseries/database.py index db401da1f..c6e64be69 100644 --- a/src/acquisition/covid_hosp/state_timeseries/database.py +++ b/src/acquisition/covid_hosp/state_timeseries/database.py @@ -18,16 +18,6 @@ class Database(BaseDatabase): ORDERED_CSV_COLUMNS = [ ('state', 'state', str), ('date', 'date', Utils.int_from_date), - ('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int), - ('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int), - ('critical_staffing_shortage_today_not_reported', - 'critical_staffing_shortage_today_not_reported', int), - ('critical_staffing_shortage_anticipated_within_week_yes', - 'critical_staffing_shortage_anticipated_within_week_yes', int), - ('critical_staffing_shortage_anticipated_within_week_no', - 'critical_staffing_shortage_anticipated_within_week_no', int), - ('critical_staffing_shortage_anticipated_within_week_not_reported', - 'critical_staffing_shortage_anticipated_within_week_not_reported', int), ('hospital_onset_covid', 'hospital_onset_covid', int), ('hospital_onset_covid_coverage', 'hospital_onset_covid_coverage', int), ('inpatient_beds', 'inpatient_beds', int), @@ -104,6 +94,16 @@ class Database(BaseDatabase): ('adult_icu_bed_utilization_coverage', 'adult_icu_bed_utilization_coverage', int), ('adult_icu_bed_utilization_numerator', 'adult_icu_bed_utilization_numerator', int), ('adult_icu_bed_utilization_denominator', 'adult_icu_bed_utilization_denominator', int), + ('critical_staffing_shortage_today_yes', 'critical_staffing_shortage_today_yes', int), + ('critical_staffing_shortage_today_no', 'critical_staffing_shortage_today_no', int), + ('critical_staffing_shortage_today_not_reported', + 'critical_staffing_shortage_today_not_reported', int), + ('critical_staffing_shortage_anticipated_within_week_yes', + 'critical_staffing_shortage_anticipated_within_week_yes', int), + ('critical_staffing_shortage_anticipated_within_week_no', + 'critical_staffing_shortage_anticipated_within_week_no', int), + ('critical_staffing_shortage_anticipated_within_week_not_reported', + 'critical_staffing_shortage_anticipated_within_week_not_reported', int), ] def __init__(self, *args, **kwargs): diff --git a/tests/acquisition/covid_hosp/state_timeseries/test_database.py b/tests/acquisition/covid_hosp/state_timeseries/test_database.py index d6678aa12..c90dc48e6 100644 --- a/tests/acquisition/covid_hosp/state_timeseries/test_database.py +++ b/tests/acquisition/covid_hosp/state_timeseries/test_database.py @@ -40,11 +40,12 @@ def test_insert_dataset(self): last_query_values = mock_cursor.execute.call_args[0][-1] expected_query_values = ( - 0, sentinel.issue, 'WY', 20200826, 2, None, 4, 2, 19, 7, 0, 26, 1464, + 0, sentinel.issue, 'WY', 20200826, 0, 26, 1464, 28, 629, 28, 17, 26, 2, 28, 13, 26, 0, 21, 0, 22, 49, 28, 10, 26, 7, 28, 17, 26, 14, 28, 0, 26, 0, 26, 114, 28, 0.4296448087431694, 28, 629, 1464, 0.027597402597402596, 26, 17, 616, 0.011946591707659873, 26, 17, - 1423, 0.09345794392523364, 26, 10, 107, 0.4298245614035088, 28, 49, 114, 'T') + 1423, 0.09345794392523364, 26, 10, 107, 0.4298245614035088, 28, 49, 114, + 2, None, 4, 2, 19, 7, 'T') self.assertEqual(len(last_query_values), len(expected_query_values)) for actual, expected in zip(last_query_values, expected_query_values): From cdd97a1e1e19cfb5e675f0803f664f901e7ec034 Mon Sep 17 00:00:00 2001 From: andrew Date: Thu, 21 Jan 2021 08:04:35 -0800 Subject: [PATCH 3/3] Update docstrings and change tuple ordering --- src/acquisition/covid_hosp/common/database.py | 21 ++++++++----------- .../covid_hosp/state_daily/database.py | 2 +- .../covid_hosp/state_timeseries/database.py | 2 +- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/acquisition/covid_hosp/common/database.py b/src/acquisition/covid_hosp/common/database.py index d40545c90..ba8c70692 100644 --- a/src/acquisition/covid_hosp/common/database.py +++ b/src/acquisition/covid_hosp/common/database.py @@ -26,14 +26,12 @@ def __init__(self, An open connection to a database. table_name : str The name of the table which holds the dataset. - columns_and_types : list[tuple[str, Callable[str, ...]]] - List of CSV columns in order of appearance in the database. The first - element of each tuple is the CSV column name, and the second element is a - function which converts a string into the appropriate datatype for the - column. + columns_and_types : tuple[str, str, Callable] + List of 3-tuples of (CSV header name, SQL column name, data type) for + all the columns in the CSV file. additional_fields : tuple[str] - Tuple of additional fields to include at the end of the row which are not - present in the CSV data. + List of 2-tuples of (value, SQL column name) fordditional fields to include + at the end of the row which are not present in the CSV data. """ self.connection = connection @@ -154,10 +152,9 @@ def insert_dataset(self, publication_date, dataframe): num_columns = 2 + len(self.columns_and_types) + len(self.additional_fields) value_placeholders = ', '.join(['%s'] * num_columns) - columns = ', '.join([f'`{i[1]}`' for i in self.columns_and_types] + - [i[0] for i in self.additional_fields]) - sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) VALUES ' \ - f'({value_placeholders})' + columns = ', '.join(f'`{i[1]}`' for i in self.columns_and_types + self.additional_fields) + sql = f'INSERT INTO `{self.table_name}` (`id`, `{self.publication_col_name}`, {columns}) ' \ + f'VALUES ({value_placeholders})' id_and_publication_date = (0, publication_date) with self.new_cursor() as cursor: for _, row in dataframe.iterrows(): @@ -170,4 +167,4 @@ def insert_dataset(self, publication_date, dataframe): cursor.execute(sql, id_and_publication_date + tuple(values) + - tuple(i[1] for i in self.additional_fields)) + tuple(i[0] for i in self.additional_fields)) diff --git a/src/acquisition/covid_hosp/state_daily/database.py b/src/acquisition/covid_hosp/state_daily/database.py index b69641e63..5b598e0e0 100644 --- a/src/acquisition/covid_hosp/state_daily/database.py +++ b/src/acquisition/covid_hosp/state_daily/database.py @@ -113,4 +113,4 @@ def __init__(self, *args, **kwargs): **kwargs, table_name=Database.TABLE_NAME, columns_and_types=Database.ORDERED_CSV_COLUMNS, - additional_fields=[('record_type', 'D')]) + additional_fields=[('D', 'record_type')]) diff --git a/src/acquisition/covid_hosp/state_timeseries/database.py b/src/acquisition/covid_hosp/state_timeseries/database.py index c6e64be69..6a482a88f 100644 --- a/src/acquisition/covid_hosp/state_timeseries/database.py +++ b/src/acquisition/covid_hosp/state_timeseries/database.py @@ -112,4 +112,4 @@ def __init__(self, *args, **kwargs): **kwargs, table_name=Database.TABLE_NAME, columns_and_types=Database.ORDERED_CSV_COLUMNS, - additional_fields=[('record_type', 'T')]) + additional_fields=[('T', 'record_type')])