Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONFIG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ ckanext.datapusher_plus.enable_druf = true
When DRUF is enabled, the following templates are overridden:
- `snippets/add_dataset.html`: Changes "Add Dataset" to redirect to resource upload
- `package/snippets/package_form.html`: Modifies form stages to show "Add data" first
- `package/snippets/resource_form.html`: Modifies resource form stages for the resource-first flow
- `scheming/package/snippets/package_form.html`: Modifies scheming form stages
- `scheming/package/snippets/resource_form.html`: Modifies scheming resource form stages

**Requirements:**
- No special CKAN version requirements
Expand Down
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,16 @@ ckan config-tool /etc/ckan/default/ckan.ini "ckanext.datapusher_plus.api_token=$

7. Add the rest of the DP+ config to your CKAN config (e.g. `/etc/ckan/default/ckan.ini`):

> **Note:** The block below is an illustrative example, not a list of defaults. Several values (e.g. `preview_rows`, `chunk_size`, `dedup`, `auto_index_threshold`, `ignore_file_hash`) differ from DP+'s actual defaults. The authoritative defaults live in [`config_declaration.yaml`](ckanext/datapusher_plus/config_declaration.yaml). Only set the keys you actually want to override.

```ini
# datapusher-plus settings
ckanext.datapusher_plus.use_proxy = false
ckanext.datapusher_plus.download_proxy =
ckanext.datapusher_plus.ssl_verify = false
# supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas
ckanext.datapusher_plus.upload_log_level = INFO
ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip
ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx ods geojson shp qgis zip
ckanext.datapusher_plus.pii_screening = false
ckanext.datapusher_plus.pii_found_abort = false
ckanext.datapusher_plus.pii_regex_resource_id_or_alias =
Expand Down Expand Up @@ -408,15 +410,17 @@ Use a DP+ extended scheming schema:
scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml
```

Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config.py) for details.
Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config.py) and [`config_declaration.yaml`](ckanext/datapusher_plus/config_declaration.yaml) for details.

> **Note:** The block below is an illustrative example, not a list of defaults. Several values differ from DP+'s actual defaults — see [`config_declaration.yaml`](ckanext/datapusher_plus/config_declaration.yaml) for the authoritative defaults. Only set the keys you actually want to override.

>```ini
> ckanext.datapusher_plus.use_proxy = false
> ckanext.datapusher_plus.download_proxy =
> ckanext.datapusher_plus.ssl_verify = false
> # supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas
> ckanext.datapusher_plus.upload_log_level = INFO
> ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsxb xlsm ods geojson shp qgis zip
> ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx ods geojson shp qgis zip
> ckanext.datapusher_plus.pii_screening = false
> ckanext.datapusher_plus.pii_found_abort = false
> ckanext.datapusher_plus.pii_regex_resource_id_or_alias =
Expand Down Expand Up @@ -454,7 +458,6 @@ Configure DP+ numerous settings. See [config.py](ckanext/datapusher_plus/config.
> ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache
> ckanext.datapusher_plus.auto_unzip_one_file = true
> ckanext.datapusher_plus.api_token = <CKAN service account token for CKAN user with sysadmin privileges>
>ckanext.datapusher_plus.describeGPT_api_key = <Token for OpenAI API compatible service>
>```
>
>and add this entry to your CKAN's `resource_formats.json` file.
Expand Down
10 changes: 8 additions & 2 deletions ckanext/datapusher_plus/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@
FORMATS = FORMATS.split()

# PII screening settings
PII_SCREENING = tk.asbool(tk.config.get("ckanext.datastore_plus.pii_screening", False))
PII_SCREENING = tk.asbool(
tk.config.get("ckanext.datapusher_plus.pii_screening", False)
)
PII_FOUND_ABORT = tk.asbool(
tk.config.get("ckanext.datapusher_plus.pii_found_abort", False)
)
Expand All @@ -72,7 +74,11 @@
)

# Data processing settings
PREVIEW_ROWS = tk.asint(tk.config.get("ckanext.datapusher_plus.preview_rows", "1000"))
# Fallback aligned to the config_declaration.yaml default (0 = load all
# rows). Under CKAN 2.10+ declarative config the declared default wins,
# so 0 is already the effective runtime default; this just removes a
# stale, never-reached "1000" fallback that contradicted the declaration.
PREVIEW_ROWS = tk.asint(tk.config.get("ckanext.datapusher_plus.preview_rows", "0"))
TIMEOUT = tk.asint(tk.config.get("ckanext.datapusher_plus.download_timeout", "300"))
QSV_COMMAND_TIMEOUT = tk.asint(
tk.config.get("ckanext.datapusher_plus.qsv_command_timeout", "1800")
Expand Down
183 changes: 183 additions & 0 deletions tests/test_pii_screening_config_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# -*- coding: utf-8 -*-
"""
Regression coverage for the ``pii_screening`` config-key typo fix.

A documentation audit (2026-05-19) found that ``config.py`` read
``PII_SCREENING`` from ``ckanext.datastore_plus.pii_screening`` — a
typo: ``datastore_plus`` instead of ``datapusher_plus``. Every other
DP+ setting, the ``config_declaration.yaml`` declaration, and the
README all use the ``datapusher_plus`` namespace. The practical
effect of the typo: operators enabling PII screening via the
documented key ``ckanext.datapusher_plus.pii_screening`` had no
effect — ``PII_SCREENING`` silently stayed at its ``False`` fallback,
so the PII-screening feature could not be turned on as documented.

These tests pin three properties:

1. The documented key ``ckanext.datapusher_plus.pii_screening``
actually drives ``PII_SCREENING`` — the functional fix.
2. The old typo'd key ``ckanext.datastore_plus.pii_screening`` does
NOT drive it — proves the read moved off the wrong namespace and
would catch a regression that reintroduced the typo.
3. The key string read in ``config.py`` matches the one declared in
``config_declaration.yaml`` — a #179-style declaration-vs-code
drift guard (AST-parsed to avoid the CKAN bootstrap).
"""

from __future__ import annotations

from pathlib import Path

import pytest


REPO_ROOT = Path(__file__).resolve().parent.parent


@pytest.fixture(autouse=True)
def _reset_config_module():
"""Reload ``ckanext.datapusher_plus.config`` after each test so a
``_reload_config`` call here can't leak a monkeypatched config
snapshot into later tests via ``sys.modules``.

``importlib.reload`` mutates the module in place; without this
teardown the last reload's constants would persist for the rest
of the session, making unrelated tests order-dependent.

As an autouse fixture this is set up before the test's
``monkeypatch`` fixture, so its finalizer runs *after*
``monkeypatch`` has restored ``tk.config`` — the reload therefore
recomputes the module constants from the pristine
test-environment config.
"""
yield
try:
import importlib

import ckanext.datapusher_plus.config as conf

importlib.reload(conf)
except Exception:
# A reload failure during teardown must not mask the actual
# test result; the next test that needs a clean config does
# its own reload via ``_reload_config`` anyway.
pass


def _reload_config(monkeypatch, key, value):
"""Set ``key`` on ``tk.config`` and reload ``config.py`` so its
import-time ``PII_SCREENING`` constant is recomputed.

``importlib.reload`` mutates the module object in place, so the
recomputed constants would otherwise persist past this test —
``monkeypatch`` restores ``tk.config`` but does not re-run
``config.py``. The ``_reset_config_module`` autouse fixture below
reloads the module once more on teardown (after ``tk.config`` is
restored) so the pristine state is back for later tests regardless
of suite order.
"""
import importlib

pytest.importorskip("ckan")
import ckan.plugins.toolkit as tk

monkeypatch.setitem(tk.config, key, value)
import ckanext.datapusher_plus.config as conf

return importlib.reload(conf)
Comment thread
jqnatividad marked this conversation as resolved.


def test_documented_key_enables_pii_screening(monkeypatch):
# The core fix: the key the README + config_declaration.yaml
# document must actually flip PII_SCREENING on.
conf = _reload_config(
monkeypatch, "ckanext.datapusher_plus.pii_screening", True
)
assert conf.PII_SCREENING is True


def test_old_typo_key_has_no_effect(monkeypatch):
# The pre-fix typo'd key must NOT drive PII_SCREENING anymore.
# Set ONLY the typo'd key to True, with the correct key absent,
# and PII_SCREENING must stay at its False default. A regression
# that reinstated ``datastore_plus`` would flip this to True.
pytest.importorskip("ckan")
import ckan.plugins.toolkit as tk

# Ensure the correct key is absent so the assertion is unambiguous
# (config_declaration.yaml declares it with default false, so the
# test env may have it pre-populated).
monkeypatch.delitem(
tk.config, "ckanext.datapusher_plus.pii_screening", raising=False
)
conf = _reload_config(
monkeypatch, "ckanext.datastore_plus.pii_screening", True
)
assert conf.PII_SCREENING is False


def test_config_py_key_matches_declaration():
"""The key string read in ``config.py`` must match the one
declared in ``config_declaration.yaml``. A namespace typo in
either source is exactly the bug this test exists to catch."""
import ast

# PyYAML guarded the same way the #112 / #142 / #61 drift-guards
# are — degrade to a skip in dep-light CI rather than ImportError.
try:
import yaml
except ImportError:
pytest.skip("PyYAML not available")

# config_declaration.yaml side — iterate all groups/options so the
# guard survives a future declaration restructure.
yaml_path = (
REPO_ROOT
/ "ckanext"
/ "datapusher_plus"
/ "config_declaration.yaml"
)
decl = yaml.safe_load(yaml_path.read_text(encoding="utf-8"))
declared_keys = {
opt.get("key")
for group in decl.get("groups", [])
for opt in group.get("options", [])
}
assert "ckanext.datapusher_plus.pii_screening" in declared_keys, (
"config_declaration.yaml must declare "
"ckanext.datapusher_plus.pii_screening."
)

# config.py side — AST-find the PII_SCREENING assignment and the
# config key string it reads.
config_py = (
REPO_ROOT / "ckanext" / "datapusher_plus" / "config.py"
).read_text()
tree = ast.parse(config_py)
found_key = None
for node in ast.walk(tree):
if not (
isinstance(node, ast.Assign)
and any(
getattr(t, "id", None) == "PII_SCREENING"
for t in node.targets
)
):
continue
for call in ast.walk(node):
if (
isinstance(call, ast.Call)
and getattr(call.func, "attr", None) == "get"
and call.args
and isinstance(call.args[0], ast.Constant)
):
found_key = call.args[0].value
break
break
assert found_key == "ckanext.datapusher_plus.pii_screening", (
f"config.py reads PII_SCREENING from {found_key!r}; must be "
"'ckanext.datapusher_plus.pii_screening' to match the "
"config_declaration.yaml declaration and the README "
"(the 'datastore_plus' typo was the documented-but-dead "
"key bug)."
)
Loading