Skip to content

Commit

Permalink
Stop unwanted overwriting (#91)
Browse files Browse the repository at this point in the history
* Draft: stop unwanted overwriting

* Add first test

* Increase coverage

Fixes: #90
  • Loading branch information
pipliggins committed Nov 23, 2023
1 parent 30ec076 commit 25014df
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 1 deletion.
36 changes: 35 additions & 1 deletion adtl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,41 @@ def update_table(self, table: str, row: StrDict):
value = get_value(row, self.spec[table][attr], self.ctx(attr))
# Check against all null elements, for combinedType=set/list, null is []
if value is not None and value != []:
self.data[table][group_key][attr] = value
if attr not in self.data[table][group_key].keys():
# if data for this field hasn't already been captured
self.data[table][group_key][attr] = value

else:
if "combinedType" in self.spec[table][attr]:
combined_type = self.spec[table][attr]["combinedType"]
existing_value = self.data[table][group_key][attr]

if combined_type in ["all", "any", "min", "max"]:
values = [existing_value, value]
# normally calling eval() is a bad idea, but here values are restricted, so okay
self.data[table][group_key][attr] = eval(combined_type)(
values
)
elif combined_type in ["list", "set"]:
if combined_type == "set":
self.data[table][group_key][attr] = list(
set(existing_value + value)
)
else:
self.data[table][group_key][attr] = (
existing_value + value
)
elif combined_type == "firstNonNull":
# only use the first value found
pass
else:
# otherwise overwrite?
logging.debug(
f"Multiple rows of data found for {attr} without a"
" combinedType listed. Data being overwritten."
)
self.data[table][group_key][attr] = value

elif kind == "oneToMany":
for match in self.spec[table]:
if "if" not in match:
Expand Down
47 changes: 47 additions & 0 deletions tests/parsers/stop-overwriting.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
[adtl]
name = "overwrite"
description = "Example using groupBy on data with multiple rows per subject"

[adtl.tables.visit]
kind = "groupBy"
groupBy = "subject_id"
aggregation = "lastNotNull"

[visit]

[visit.subject_id]
field = "subjid"
description = "Subject ID"

[visit.earliest_admission]
combinedType = "min"
fields = [
{ field = "first_admit" },
]

[visit.start_date]
combinedType = "firstNonNull"
fields = [
{ field = "first_admit" },
{ field = "enrolment" },
]

[visit.icu_admission_date]
combinedType = "list"
excludeWhen = "none"
fields = [
{field = "icu_admission_date"}
]

[visit.treatment_antiviral_type]
combinedType = "set"
excludeWhen = "none"
fields = [
{ field = "daily_antiviral_type___1", values = { 1 = "Ribavirin" } },
{ field = "daily_antiviral_type___2", values = { 1 = "Lopinavir" } },
{ field = "daily_antiviral_type___3", values = { 1 = "Interferon" } },
{ field = "overall_antiviral_dc___1", values = { 1 = "Ribavirin" } },
{ field = "overall_antiviral_dc___2", values = { 1 = "Lopinavir" } },
{ field = "overall_antiviral_dc___3", values = { 1 = "Interferon" } },
]

11 changes: 11 additions & 0 deletions tests/sources/stop-overwriting.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
subjid,redcap,first_admit,enrolment,icu_admission_date,daily_antiviral_type___1,daily_antiviral_type___2,daily_antiviral_type___3,overall_antiviral_dc___1,overall_antiviral_dc___2,overall_antiviral_dc___3
1,admit,2023-11-20,2023-11-23,,0,0,0,0,0,0
1,discharge,,,,0,0,0,1,0,1
1,day1,2023-11-19,,,1,0,0,0,0,0
1,day2,,,,1,0,0,0,0,0
2,admit,,2022-11-23,,0,0,0,0,0,0
2,discharge,,2020-11-23,2020-11-25,0,0,0,0,0,0
2,day1,,,2020-11-30,0,1,0,0,0,0
3,admit,,2020-02-20,,0,0,0,0,0,0
3,discharge,,,,0,0,0,0,1,1
3,day1,,,,1,0,0,0,0,0
30 changes: 30 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,3 +1316,33 @@ def test_combinedtype_wordsubstituteset(test_row, test_combination, expected):
}

assert parser.get_combined_type(test_row, test_rule) == unordered(expected)


OVERWRITE_OUTPUT = [
{
"subject_id": 1,
"earliest_admission": "2023-11-19",
"start_date": "2023-11-20",
"treatment_antiviral_type": unordered(["Ribavirin", "Interferon"]),
},
{
"subject_id": 2,
"start_date": "2022-11-23",
"icu_admission_date": unordered(["2020-11-25", "2020-11-30"]),
"treatment_antiviral_type": ["Lopinavir"],
},
{
"subject_id": 3,
"start_date": "2020-02-20",
"treatment_antiviral_type": unordered(["Ribavirin", "Lopinavir", "Interferon"]),
},
]


def test_no_overwriting():
overwriting_output = list(
parser.Parser(TEST_PARSERS_PATH / "stop-overwriting.toml")
.parse(TEST_SOURCES_PATH / "stop-overwriting.csv")
.read_table("visit")
)
assert overwriting_output == OVERWRITE_OUTPUT

0 comments on commit 25014df

Please sign in to comment.