diff --git a/adtl/__init__.py b/adtl/__init__.py index 46e953f..b02cfd8 100644 --- a/adtl/__init__.py +++ b/adtl/__init__.py @@ -745,13 +745,18 @@ def update_table(self, table: str, row: StrDict): existing_value + value ) elif combined_type == "firstNonNull": + # only use the first value found pass else: raise ValueError( - f"Could not return value for {combined_type}" + f"Unrecognised combined type: {combined_type}" ) else: # otherwise overwrite? + logging.debug( + f"Multiple rows of data found for {attr} without a" + " combinedType listed. Data being overwritten." + ) self.data[table][group_key][attr] = value elif kind == "oneToMany": for match in self.spec[table]: diff --git a/tests/parsers/stop-overwriting.toml b/tests/parsers/stop-overwriting.toml new file mode 100644 index 0000000..a36ae5b --- /dev/null +++ b/tests/parsers/stop-overwriting.toml @@ -0,0 +1,40 @@ +[adtl] + name = "overwrite" + description = "Example using groupBy on data with multiple rows per subject" + +[adtl.tables.visit] + kind = "groupBy" + groupBy = "subject_id" + aggregation = "lastNotNull" + +[visit] + + [visit.subject_id] + field = "subjid" + description = "Subject ID" + + [visit.earliest_admission] + combinedType = "min" + fields = [ + { field = "first_admit" }, + ] + + [visit.start_date] + combinedType = "firstNonNull" + fields = [ + { field = "first_admit" }, + { field = "enrolment" }, + ] + + [visit.treatment_antiviral_type] + combinedType = "set" + excludeWhen = "none" + fields = [ + { field = "daily_antiviral_type___1", values = { 1 = "Ribavirin" } }, + { field = "daily_antiviral_type___2", values = { 1 = "Lopinavir" } }, + { field = "daily_antiviral_type___3", values = { 1 = "Interferon" } }, + { field = "overall_antiviral_dc___1", values = { 1 = "Ribavirin" } }, + { field = "overall_antiviral_dc___2", values = { 1 = "Lopinavir" } }, + { field = "overall_antiviral_dc___3", values = { 1 = "Interferon" } }, + ] + diff --git a/tests/sources/stop-overwriting.csv b/tests/sources/stop-overwriting.csv new file mode 100644 index 0000000..878a0c3 --- /dev/null +++ b/tests/sources/stop-overwriting.csv @@ -0,0 +1,11 @@ +subjid,redcap,first_admit,enrolment,daily_antiviral_type___1,daily_antiviral_type___2,daily_antiviral_type___3,overall_antiviral_dc___1,overall_antiviral_dc___2,overall_antiviral_dc___3 +1,admit,2023-11-20,2023-11-23,0,0,0,0,0,0 +1,discharge,,,0,0,0,1,0,1 +1,day1,2023-11-19,,1,0,0,0,0,0 +1,day2,,,1,0,0,0,0,0 +2,admit,,2022-11-23,0,0,0,0,0,0 +2,discharge,,2020-11-23,0,0,0,0,0,0 +2,day1,,,0,1,0,0,0,0 +3,admit,,2020-02-20,0,0,0,0,0,0 +3,discharge,,,0,0,0,0,1,1 +3,day1,,,1,0,0,0,0,0 \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py index 279e079..edf4ea7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1316,3 +1316,32 @@ def test_combinedtype_wordsubstituteset(test_row, test_combination, expected): } assert parser.get_combined_type(test_row, test_rule) == unordered(expected) + + +OVERWRITE_OUTPUT = [ + { + "subject_id": 1, + "earliest_admission": "2023-11-19", + "start_date": "2023-11-20", + "treatment_antiviral_type": unordered(["Ribavirin", "Interferon"]), + }, + { + "subject_id": 2, + "start_date": "2022-11-23", + "treatment_antiviral_type": ["Lopinavir"], + }, + { + "subject_id": 3, + "start_date": "2020-02-20", + "treatment_antiviral_type": unordered(["Ribavirin", "Lopinavir", "Interferon"]), + }, +] + + +def test_no_overwriting(): + overwriting_output = list( + parser.Parser(TEST_PARSERS_PATH / "stop-overwriting.toml") + .parse(TEST_SOURCES_PATH / "stop-overwriting.csv") + .read_table("visit") + ) + assert overwriting_output == OVERWRITE_OUTPUT