Skip to content

Commit

Permalink
Allow external definitions, fixes #10
Browse files Browse the repository at this point in the history
  • Loading branch information
abhidg committed May 17, 2023
1 parent bb5b1e1 commit 115bf10
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 8 deletions.
35 changes: 33 additions & 2 deletions adtl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,14 +385,31 @@ def make_fields_optional(
return _schema


def relative_path(source_file, target_file):
return Path(source_file).parent / target_file


def read_definition(file: Path) -> Dict[str, Any]:
"Reads definition from file into a dictionary"
if file.suffix == ".json":
with file.open() as fp:
return json.load(fp)
elif file.suffix == ".toml":
with file.open("rb") as fp:
return tomli.load(fp)
else:
raise ValueError(f"Unsupported file format: {file}")


class Parser:
def __init__(self, spec: Union[str, Path, StrDict]):
def __init__(self, spec: Union[str, Path, StrDict], include_defs: List[str] = []):
"Loads specification from spec in format (default json)"

self.data: StrDict = {}
self.defs: StrDict = {}
self.fieldnames: Dict[str, List[str]] = {}
self.specfile = None
self.include_defs = include_defs
self.validators: StrDict = {}
self.schemas: StrDict = {}
self.date_fields = []
Expand All @@ -414,7 +431,15 @@ def __init__(self, spec: Union[str, Path, StrDict]):
else:
self.spec = spec
self.header = self.spec.get("adtl", {})
if self.specfile:
self.include_defs = [
relative_path(self.specfile, definition_file)
for definition_file in self.header.get("include-def", [])
] + self.include_defs
self.defs = self.header.get("defs", {})
if self.include_defs:
for definition_file in self.include_defs:
self.defs.update(read_definition(definition_file))
self.spec = expand_refs(self.spec, self.defs)

self.validate_spec()
Expand Down Expand Up @@ -714,8 +739,14 @@ def main():
cmd.add_argument(
"--encoding", help="Encoding input file is in", default="utf-8-sig"
)
cmd.add_argument(
"--include-def",
action="append",
help="Include external definition (TOML or JSON)",
)
args = cmd.parse_args()
spec = Parser(args.spec)
include_defs = args.include_def or []
spec = Parser(args.spec, include_defs=include_defs)
if output := spec.parse(args.file, encoding=args.encoding).save(
args.output or spec.name
):
Expand Down
40 changes: 38 additions & 2 deletions docs/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ These metadata fields are defined under a header key `adtl`.

### Optional fields

### Optional fields

* **defs**: Definitions that can be referred to elsewhere in the schema
* **include-def** (list): List of additional TOML or JSON files to import as
definitions
* **defaultDateFormat**: Default source date format, applied to all fields
with either "date_" / "_date" in the field name or that have format date
set in the JSON schema
Expand Down Expand Up @@ -81,6 +81,42 @@ someTable = { groupBy = "subjid", aggregation = "lastNotNull" }
someReference = { values = { 1 = true, 2 = false } }
```

Often some definitions are repeated across files. adtl supports including
definitions from external files using the *include-def* keyword under the
`[adtl]` section. As an example, a mapping of country codes to country names
could be stored in `countries.toml`:

```toml
[countryMap.values]
1 = "ALB"
2 = "ZZZ"
# and so on
```

This could be included in adtl, and used as a reference just as if it was included in the TOML file directly:

```toml
[adtl]
include-def = ["countries.toml"]

# ...

[cases.country_iso3]
field = "country"
ref = "countryMap
```
Definition files can also be included from the command line by passing the
`--include-def` flag to adtl. This is useful when the included file can change
from one run to another, or in cases where the definitions/mappings are located
externally. The following would produce an equivalent result to the
`include-def` assignment in the above example, assuming `data.csv` is the source
data file:
```shell
adtl parser.toml data.csv --include-def countries.toml
```
## Table mappings
Each table has its associated field mappings under a key of the same
Expand Down
30 changes: 30 additions & 0 deletions tests/parsers/groupBy-defs-include.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[adtl]
name = "groupBy"
description = "Example using groupBy"
include-def = ["include-def.toml"]

[subject]
dataset_id = "dataset-2020-03-23"
country_iso3 = "GBR"

[subject.sex_at_birth]
ref = "sexMapping"
field = "sex"
description = "Sex at Birth"

[subject.subject_id]
field = "subjid"
description = "Subject ID"

[subject.enrolment_date]
field = "dsstdat"
description = "Date of Enrolment"

[subject.admission_date]
field = "hostdat"
description = "Admission date at this facility"

[adtl.tables.subject]
kind = "groupBy"
groupBy = "subject_id"
aggregation = "lastNotNull"
29 changes: 29 additions & 0 deletions tests/parsers/groupBy-external-defs.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[adtl]
name = "groupBy"
description = "Example using groupBy"

[subject]
dataset_id = "dataset-2020-03-23"
country_iso3 = "GBR"

[subject.sex_at_birth]
ref = "sexMapping"
field = "sex"
description = "Sex at Birth"

[subject.subject_id]
field = "subjid"
description = "Subject ID"

[subject.enrolment_date]
field = "dsstdat"
description = "Date of Enrolment"

[subject.admission_date]
field = "hostdat"
description = "Admission date at this facility"

[adtl.tables.subject]
kind = "groupBy"
groupBy = "subject_id"
aggregation = "lastNotNull"
4 changes: 4 additions & 0 deletions tests/parsers/include-def.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[sexMapping.values]
1 = "male"
2 = "female"
3 = "non_binary"
24 changes: 20 additions & 4 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,10 +626,26 @@ def test_reference_expansion():
assert ps_ref.spec == ps_noref.spec


def test_format_equivalence():
adtl_json = parser.Parser(TEST_PARSERS_PATH / "groupBy-defs.json")
adtl_toml = parser.Parser(TEST_PARSERS_PATH / "groupBy-defs.toml")
assert adtl_json.spec == adtl_toml.spec
def test_reference_expansion_with_include():
ps_noinclude = parser.Parser(TEST_PARSERS_PATH / "groupBy-defs.toml")
ps_include = parser.Parser(TEST_PARSERS_PATH / "groupBy-defs-include.toml")
del ps_noinclude.spec["adtl"]["defs"]
del ps_include.spec["adtl"]["include-def"]
assert ps_noinclude.spec == ps_include.spec


def test_external_definitions():
with pytest.raises(KeyError):
parser.Parser(TEST_PARSERS_PATH / "groupBy-external-defs.toml")
ps = parser.Parser(
TEST_PARSERS_PATH / "groupBy-external-defs.toml",
include_defs=[TEST_PARSERS_PATH / "include-def.toml"],
)
assert ps.defs["sexMapping"]["values"] == {
"1": "male",
"2": "female",
"3": "non_binary",
}


FOR_PATTERN = [
Expand Down

0 comments on commit 115bf10

Please sign in to comment.