diff --git a/CONFIG.md b/CONFIG.md index e0ebc1e..d6ffd3d 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -49,7 +49,9 @@ ckanext.datapusher_plus.enable_druf = true When DRUF is enabled, the following templates are overridden: - `snippets/add_dataset.html`: Changes "Add Dataset" to redirect to resource upload - `package/snippets/package_form.html`: Modifies form stages to show "Add data" first +- `package/snippets/resource_form.html`: Modifies resource form stages for the resource-first flow - `scheming/package/snippets/package_form.html`: Modifies scheming form stages +- `scheming/package/snippets/resource_form.html`: Modifies scheming resource form stages **Requirements:** - No special CKAN version requirements diff --git a/README.md b/README.md index d288339..2dfa1bf 100644 --- a/README.md +++ b/README.md @@ -303,6 +303,8 @@ ckan config-tool /etc/ckan/default/ckan.ini "ckanext.datapusher_plus.api_token=$ 7. Add the rest of the DP+ config to your CKAN config (e.g. `/etc/ckan/default/ckan.ini`): +> **Note:** The block below is an illustrative example, not a list of defaults. Several values (e.g. `preview_rows`, `chunk_size`, `dedup`, `auto_index_threshold`, `ignore_file_hash`) differ from DP+'s actual defaults. The authoritative defaults live in [`config_declaration.yaml`](ckanext/datapusher_plus/config_declaration.yaml). Only set the keys you actually want to override. + ```ini # datapusher-plus settings ckanext.datapusher_plus.use_proxy = false @@ -310,7 +312,7 @@ ckanext.datapusher_plus.download_proxy = ckanext.datapusher_plus.ssl_verify = false # supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas ckanext.datapusher_plus.upload_log_level = INFO -ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip +ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx ods geojson shp qgis zip ckanext.datapusher_plus.pii_screening = false ckanext.datapusher_plus.pii_found_abort = false ckanext.datapusher_plus.pii_regex_resource_id_or_alias = @@ -408,7 +410,9 @@ Use a DP+ extended scheming schema: scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml ``` -Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config.py) for details. +Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config.py) and [`config_declaration.yaml`](ckanext/datapusher_plus/config_declaration.yaml) for details. + +> **Note:** The block below is an illustrative example, not a list of defaults. Several values differ from DP+'s actual defaults — see [`config_declaration.yaml`](ckanext/datapusher_plus/config_declaration.yaml) for the authoritative defaults. Only set the keys you actually want to override. >```ini > ckanext.datapusher_plus.use_proxy = false @@ -416,7 +420,7 @@ Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config. > ckanext.datapusher_plus.ssl_verify = false > # supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas > ckanext.datapusher_plus.upload_log_level = INFO -> ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip +> ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx ods geojson shp qgis zip > ckanext.datapusher_plus.pii_screening = false > ckanext.datapusher_plus.pii_found_abort = false > ckanext.datapusher_plus.pii_regex_resource_id_or_alias = @@ -454,7 +458,6 @@ Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config. > ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache > ckanext.datapusher_plus.auto_unzip_one_file = true > ckanext.datapusher_plus.api_token = ->ckanext.datapusher_plus.describeGPT_api_key = >``` > >and add this entry to your CKAN's `resource_formats.json` file. diff --git a/ckanext/datapusher_plus/config.py b/ckanext/datapusher_plus/config.py index 4f89146..93bab8a 100644 --- a/ckanext/datapusher_plus/config.py +++ b/ckanext/datapusher_plus/config.py @@ -46,7 +46,9 @@ FORMATS = FORMATS.split() # PII screening settings -PII_SCREENING = tk.asbool(tk.config.get("ckanext.datastore_plus.pii_screening", False)) +PII_SCREENING = tk.asbool( + tk.config.get("ckanext.datapusher_plus.pii_screening", False) +) PII_FOUND_ABORT = tk.asbool( tk.config.get("ckanext.datapusher_plus.pii_found_abort", False) ) @@ -72,7 +74,11 @@ ) # Data processing settings -PREVIEW_ROWS = tk.asint(tk.config.get("ckanext.datapusher_plus.preview_rows", "1000")) +# Fallback aligned to the config_declaration.yaml default (0 = load all +# rows). Under CKAN 2.10+ declarative config the declared default wins, +# so 0 is already the effective runtime default; this just removes a +# stale, never-reached "1000" fallback that contradicted the declaration. +PREVIEW_ROWS = tk.asint(tk.config.get("ckanext.datapusher_plus.preview_rows", "0")) TIMEOUT = tk.asint(tk.config.get("ckanext.datapusher_plus.download_timeout", "300")) QSV_COMMAND_TIMEOUT = tk.asint( tk.config.get("ckanext.datapusher_plus.qsv_command_timeout", "1800") diff --git a/tests/test_pii_screening_config_key.py b/tests/test_pii_screening_config_key.py new file mode 100644 index 0000000..b116970 --- /dev/null +++ b/tests/test_pii_screening_config_key.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +""" +Regression coverage for the ``pii_screening`` config-key typo fix. + +A documentation audit (2026-05-19) found that ``config.py`` read +``PII_SCREENING`` from ``ckanext.datastore_plus.pii_screening`` — a +typo: ``datastore_plus`` instead of ``datapusher_plus``. Every other +DP+ setting, the ``config_declaration.yaml`` declaration, and the +README all use the ``datapusher_plus`` namespace. The practical +effect of the typo: operators enabling PII screening via the +documented key ``ckanext.datapusher_plus.pii_screening`` had no +effect — ``PII_SCREENING`` silently stayed at its ``False`` fallback, +so the PII-screening feature could not be turned on as documented. + +These tests pin three properties: + +1. The documented key ``ckanext.datapusher_plus.pii_screening`` + actually drives ``PII_SCREENING`` — the functional fix. +2. The old typo'd key ``ckanext.datastore_plus.pii_screening`` does + NOT drive it — proves the read moved off the wrong namespace and + would catch a regression that reintroduced the typo. +3. The key string read in ``config.py`` matches the one declared in + ``config_declaration.yaml`` — a #179-style declaration-vs-code + drift guard (AST-parsed to avoid the CKAN bootstrap). +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parent.parent + + +@pytest.fixture(autouse=True) +def _reset_config_module(): + """Reload ``ckanext.datapusher_plus.config`` after each test so a + ``_reload_config`` call here can't leak a monkeypatched config + snapshot into later tests via ``sys.modules``. + + ``importlib.reload`` mutates the module in place; without this + teardown the last reload's constants would persist for the rest + of the session, making unrelated tests order-dependent. + + As an autouse fixture this is set up before the test's + ``monkeypatch`` fixture, so its finalizer runs *after* + ``monkeypatch`` has restored ``tk.config`` — the reload therefore + recomputes the module constants from the pristine + test-environment config. + """ + yield + try: + import importlib + + import ckanext.datapusher_plus.config as conf + + importlib.reload(conf) + except Exception: + # A reload failure during teardown must not mask the actual + # test result; the next test that needs a clean config does + # its own reload via ``_reload_config`` anyway. + pass + + +def _reload_config(monkeypatch, key, value): + """Set ``key`` on ``tk.config`` and reload ``config.py`` so its + import-time ``PII_SCREENING`` constant is recomputed. + + ``importlib.reload`` mutates the module object in place, so the + recomputed constants would otherwise persist past this test — + ``monkeypatch`` restores ``tk.config`` but does not re-run + ``config.py``. The ``_reset_config_module`` autouse fixture below + reloads the module once more on teardown (after ``tk.config`` is + restored) so the pristine state is back for later tests regardless + of suite order. + """ + import importlib + + pytest.importorskip("ckan") + import ckan.plugins.toolkit as tk + + monkeypatch.setitem(tk.config, key, value) + import ckanext.datapusher_plus.config as conf + + return importlib.reload(conf) + + +def test_documented_key_enables_pii_screening(monkeypatch): + # The core fix: the key the README + config_declaration.yaml + # document must actually flip PII_SCREENING on. + conf = _reload_config( + monkeypatch, "ckanext.datapusher_plus.pii_screening", True + ) + assert conf.PII_SCREENING is True + + +def test_old_typo_key_has_no_effect(monkeypatch): + # The pre-fix typo'd key must NOT drive PII_SCREENING anymore. + # Set ONLY the typo'd key to True, with the correct key absent, + # and PII_SCREENING must stay at its False default. A regression + # that reinstated ``datastore_plus`` would flip this to True. + pytest.importorskip("ckan") + import ckan.plugins.toolkit as tk + + # Ensure the correct key is absent so the assertion is unambiguous + # (config_declaration.yaml declares it with default false, so the + # test env may have it pre-populated). + monkeypatch.delitem( + tk.config, "ckanext.datapusher_plus.pii_screening", raising=False + ) + conf = _reload_config( + monkeypatch, "ckanext.datastore_plus.pii_screening", True + ) + assert conf.PII_SCREENING is False + + +def test_config_py_key_matches_declaration(): + """The key string read in ``config.py`` must match the one + declared in ``config_declaration.yaml``. A namespace typo in + either source is exactly the bug this test exists to catch.""" + import ast + + # PyYAML guarded the same way the #112 / #142 / #61 drift-guards + # are — degrade to a skip in dep-light CI rather than ImportError. + try: + import yaml + except ImportError: + pytest.skip("PyYAML not available") + + # config_declaration.yaml side — iterate all groups/options so the + # guard survives a future declaration restructure. + yaml_path = ( + REPO_ROOT + / "ckanext" + / "datapusher_plus" + / "config_declaration.yaml" + ) + decl = yaml.safe_load(yaml_path.read_text(encoding="utf-8")) + declared_keys = { + opt.get("key") + for group in decl.get("groups", []) + for opt in group.get("options", []) + } + assert "ckanext.datapusher_plus.pii_screening" in declared_keys, ( + "config_declaration.yaml must declare " + "ckanext.datapusher_plus.pii_screening." + ) + + # config.py side — AST-find the PII_SCREENING assignment and the + # config key string it reads. + config_py = ( + REPO_ROOT / "ckanext" / "datapusher_plus" / "config.py" + ).read_text() + tree = ast.parse(config_py) + found_key = None + for node in ast.walk(tree): + if not ( + isinstance(node, ast.Assign) + and any( + getattr(t, "id", None) == "PII_SCREENING" + for t in node.targets + ) + ): + continue + for call in ast.walk(node): + if ( + isinstance(call, ast.Call) + and getattr(call.func, "attr", None) == "get" + and call.args + and isinstance(call.args[0], ast.Constant) + ): + found_key = call.args[0].value + break + break + assert found_key == "ckanext.datapusher_plus.pii_screening", ( + f"config.py reads PII_SCREENING from {found_key!r}; must be " + "'ckanext.datapusher_plus.pii_screening' to match the " + "config_declaration.yaml declaration and the README " + "(the 'datastore_plus' typo was the documented-but-dead " + "key bug)." + )