diff --git a/adtl/__init__.py b/adtl/__init__.py index b02cfd8..4880082 100644 --- a/adtl/__init__.py +++ b/adtl/__init__.py @@ -718,46 +718,42 @@ def update_table(self, table: str, row: StrDict): for attr in self.spec[table]: value = get_value(row, self.spec[table][attr], self.ctx(attr)) # Check against all null elements, for combinedType=set/list, null is [] - if ( - value is not None - and value != [] - and attr not in self.data[table][group_key].keys() - ): - # if data for this field hasn't already been captured - self.data[table][group_key][attr] = value - elif value is not None and value != []: - if "combinedType" in self.spec[table][attr]: - combined_type = self.spec[table][attr]["combinedType"] - existing_value = self.data[table][group_key][attr] - if combined_type in ["all", "any", "min", "max"]: - values = [existing_value, value] - # normally calling eval() is a bad idea, but here values are restricted, so okay - self.data[table][group_key][attr] = eval(combined_type)( - values - ) - elif combined_type in ["list", "set"]: - if combined_type == "set": - self.data[table][group_key][attr] = list( - set(existing_value + value) - ) - else: - self.data[table][group_key][attr] = ( - existing_value + value + if value is not None and value != []: + if attr not in self.data[table][group_key].keys(): + # if data for this field hasn't already been captured + self.data[table][group_key][attr] = value + + else: + if "combinedType" in self.spec[table][attr]: + combined_type = self.spec[table][attr]["combinedType"] + existing_value = self.data[table][group_key][attr] + + if combined_type in ["all", "any", "min", "max"]: + values = [existing_value, value] + # normally calling eval() is a bad idea, but here values are restricted, so okay + self.data[table][group_key][attr] = eval(combined_type)( + values ) - elif combined_type == "firstNonNull": - # only use the first value found - pass + elif combined_type in ["list", "set"]: + if combined_type == "set": + self.data[table][group_key][attr] = list( + set(existing_value + value) + ) + else: + self.data[table][group_key][attr] = ( + existing_value + value + ) + elif combined_type == "firstNonNull": + # only use the first value found + pass else: - raise ValueError( - f"Unrecognised combined type: {combined_type}" + # otherwise overwrite? + logging.debug( + f"Multiple rows of data found for {attr} without a" + " combinedType listed. Data being overwritten." ) - else: - # otherwise overwrite? - logging.debug( - f"Multiple rows of data found for {attr} without a" - " combinedType listed. Data being overwritten." - ) - self.data[table][group_key][attr] = value + self.data[table][group_key][attr] = value + elif kind == "oneToMany": for match in self.spec[table]: if "if" not in match: diff --git a/tests/parsers/stop-overwriting.toml b/tests/parsers/stop-overwriting.toml index a36ae5b..7ebdc0b 100644 --- a/tests/parsers/stop-overwriting.toml +++ b/tests/parsers/stop-overwriting.toml @@ -26,6 +26,13 @@ { field = "enrolment" }, ] + [visit.icu_admission_date] + combinedType = "list" + excludeWhen = "none" + fields = [ + {field = "icu_admission_date"} + ] + [visit.treatment_antiviral_type] combinedType = "set" excludeWhen = "none" diff --git a/tests/sources/stop-overwriting.csv b/tests/sources/stop-overwriting.csv index 878a0c3..2455376 100644 --- a/tests/sources/stop-overwriting.csv +++ b/tests/sources/stop-overwriting.csv @@ -1,11 +1,11 @@ -subjid,redcap,first_admit,enrolment,daily_antiviral_type___1,daily_antiviral_type___2,daily_antiviral_type___3,overall_antiviral_dc___1,overall_antiviral_dc___2,overall_antiviral_dc___3 -1,admit,2023-11-20,2023-11-23,0,0,0,0,0,0 -1,discharge,,,0,0,0,1,0,1 -1,day1,2023-11-19,,1,0,0,0,0,0 -1,day2,,,1,0,0,0,0,0 -2,admit,,2022-11-23,0,0,0,0,0,0 -2,discharge,,2020-11-23,0,0,0,0,0,0 -2,day1,,,0,1,0,0,0,0 -3,admit,,2020-02-20,0,0,0,0,0,0 -3,discharge,,,0,0,0,0,1,1 -3,day1,,,1,0,0,0,0,0 \ No newline at end of file +subjid,redcap,first_admit,enrolment,icu_admission_date,daily_antiviral_type___1,daily_antiviral_type___2,daily_antiviral_type___3,overall_antiviral_dc___1,overall_antiviral_dc___2,overall_antiviral_dc___3 +1,admit,2023-11-20,2023-11-23,,0,0,0,0,0,0 +1,discharge,,,,0,0,0,1,0,1 +1,day1,2023-11-19,,,1,0,0,0,0,0 +1,day2,,,,1,0,0,0,0,0 +2,admit,,2022-11-23,,0,0,0,0,0,0 +2,discharge,,2020-11-23,2020-11-25,0,0,0,0,0,0 +2,day1,,,2020-11-30,0,1,0,0,0,0 +3,admit,,2020-02-20,,0,0,0,0,0,0 +3,discharge,,,,0,0,0,0,1,1 +3,day1,,,,1,0,0,0,0,0 \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py index edf4ea7..32116a5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1328,6 +1328,7 @@ def test_combinedtype_wordsubstituteset(test_row, test_combination, expected): { "subject_id": 2, "start_date": "2022-11-23", + "icu_admission_date": unordered(["2020-11-25", "2020-11-30"]), "treatment_antiviral_type": ["Lopinavir"], }, {