Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

157 missing translations warning #571

Merged
merged 23 commits into from
Feb 10, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e572ea4
add: warning for missing translations in the survey sheet
lindsay-stevens Nov 19, 2021
bd7cf43
add: happy path tests for establishing translations behaviour
lindsay-stevens Nov 22, 2021
b796c37
fix: formatting
lindsay-stevens Nov 22, 2021
ecc63f6
fix: clarify xpath string names and comments
lindsay-stevens Nov 22, 2021
8ae5482
add: test cases / handling for issue 157 case and media cols, tidy up
lindsay-stevens Nov 23, 2021
3c69a6f
add: test cases for choices sheet translations, organise tests
lindsay-stevens Nov 24, 2021
1763349
chg: refactor remaining translation tests to use XPathHelper, add docs
lindsay-stevens Nov 26, 2021
83ef307
add: show the XPath and the XForm when an assert_xpath_count fails
lindsay-stevens Nov 26, 2021
37ba6ee
add: tests / handling for constraintMsg and requiredMsg missing langs
lindsay-stevens Nov 26, 2021
dfeca3e
fix: remove leftover debug=True parameter, correct docstring typo
lindsay-stevens Nov 30, 2021
90a2895
add: check for missing translations in choices sheet
lindsay-stevens Nov 30, 2021
d3e6f82
chg: move missing_translations_check into validator subpackage
lindsay-stevens Nov 30, 2021
ffd073d
add: test case for xlsform with missing translations in survey + choices
lindsay-stevens Nov 30, 2021
06fe4c0
fix: formatting
lindsay-stevens Nov 30, 2021
6626782
fix: remove test debug setting, use survey constant instead of string
lindsay-stevens Nov 30, 2021
ad6830d
fix: formatting
lindsay-stevens Nov 30, 2021
99b0eae
fix: formatting
lindsay-stevens Nov 30, 2021
978c307
add: performance test for missing translations check (default skipped)
lindsay-stevens Dec 6, 2021
f1babeb
Merge branch 'master' into pyxform-157
lindsay-stevens Dec 6, 2021
eb6f0fe
fix: performance improvements for large forms
lindsay-stevens Jan 21, 2022
6e9adb6
Merge branch 'master' into pyxform-157
lindsay-stevens Jan 28, 2022
ed1701c
chg: remove default_language from check criteria, update warning format
lindsay-stevens Feb 7, 2022
8c12e4e
fix: formatting / linter warnings
lindsay-stevens Feb 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions pyxform/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@
"body": "control",
"parameters": "parameters",
}
# Key is the pyxform internal name, Value is the name used in error/warning messages.
TRANSLATABLE_SURVEY_COLUMNS = {
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
constants.LABEL: constants.LABEL,
# Per ODK Spec, could include "short" once pyxform supports it.
constants.HINT: constants.HINT,
"guidance_hint": "guidance_hint",
"image": survey_header["image"],
# Per ODK Spec, could include "big-image" once pyxform supports it.
"audio": survey_header["audio"],
"video": survey_header["video"],
"jr:constraintMsg": "constraint_message",
"jr:requiredMsg": "required_message",
}
TRANSLATABLE_CHOICES_COLUMNS = {
"label": constants.LABEL,
"image": "media::image",
"audio": "media::audio",
"video": "media::video",
}
list_header = {
"caption": constants.LABEL,
"list_name": constants.LIST_NAME,
Expand Down
3 changes: 2 additions & 1 deletion pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
SUBMISSION_URL = "submission_url"
AUTO_SEND = "auto_send"
AUTO_DELETE = "auto_delete"
DEFAULT_LANGUAGE = "default_language"
DEFAULT_LANGUAGE_KEY = "default_language"
DEFAULT_LANGUAGE_VALUE = "default"
LABEL = "label"
HINT = "hint"
STYLE = "style"
Expand Down
Empty file.
137 changes: 137 additions & 0 deletions pyxform/validators/pyxform/missing_translations_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from collections import defaultdict
from typing import TYPE_CHECKING

from pyxform import aliases, constants

if TYPE_CHECKING:
from typing import Any, Dict, List, Optional, Sequence, Union

SheetData = List[Dict[str, Union[str, Dict]]]


def format_missing_translations_msg(
_in: "Dict[str, Dict[str, Sequence]]",
) -> "Optional[str]":
"""
Format the missing translations data into a warning message.

:param _in: A dict structured as Dict[survey|choices: Dict[column_name: (languages)]].
In other words, for the survey or choices sheet, a dict of the column names and
language(s) for which there are missing translations.
:return: The warning message, or None if there were no missing columns.
"""

def get_sheet_msg(name, sheet):
prefix = "{s} column(s) and language(s): {c}"
if sheet is not None:
_keys = sorted(sheet.keys())
if 0 < len(_keys):
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
c = "; ".join((f"'{k}': '" + "', '".join(sheet[k]) + "'" for k in _keys))
return prefix.format(s=name, c=c)
return ""

survey = get_sheet_msg(name=constants.SURVEY, sheet=_in.get(constants.SURVEY))
choices = get_sheet_msg(name=constants.CHOICES, sheet=_in.get(constants.CHOICES))

messages = tuple(i for i in (survey, choices) if i != "")
if 0 == len(messages):
return None
return (
"Missing translation column(s): there is no default_language set, and a "
"translation column was not found for the {cols_msg}. "
"To avoid unexpected form behaviour, specify a default_language in the "
"settings sheet, or add the missing translation columns."
).format(cols_msg=" and ".join(messages))


def find_missing_translations(
sheet_data: "SheetData",
translatable_columns: "Dict[str, str]",
) -> "Dict[str, List[str]]":
"""
Find missing translation columns in the sheet data.

For each translatable column used in the sheet, there should be a translation for
each language (including the default / unspecified language) that is used for any
other translatable column.

This could be more efficient by not looking at every row, but that's how the data is
arranged by the time it passes through workbook_to_json(). On the bright side it
means this function could be adapted to warn about specific items lacking
translations, even when there are no missing translation columns.

:param sheet_data: The survey or choices sheet data.
:param translatable_columns: The translatable columns for a sheet. The structure
should be Dict[internal_name, external_name]. See the aliases module.
:return: Dict[column_name, List[languages]]
"""
translations_seen = defaultdict(list)
translation_columns_seen = set()

def process_cell(typ, cell):
if cell is not None:
if typ in translatable_columns.keys():
name = translatable_columns[typ]
if isinstance(cell, str):
translations_seen[constants.DEFAULT_LANGUAGE_VALUE].append(name)
translation_columns_seen.add(name)
elif isinstance(cell, dict):
for lng in cell:
translations_seen[lng].append(name)
translation_columns_seen.add(name)

lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
for row in sheet_data:
for column_type, cell_content in row.items():
if column_type == constants.MEDIA:
for media_type, media_cell in cell_content.items():
process_cell(typ=media_type, cell=media_cell)
if column_type == constants.BIND:
for bind_type, bind_cell in cell_content.items():
process_cell(typ=bind_type, cell=bind_cell)
else:
process_cell(typ=column_type, cell=cell_content)

missing = defaultdict(list)
for lang, lang_trans in translations_seen.items():
for seen_tran in translation_columns_seen:
if seen_tran not in lang_trans:
missing[seen_tran].append(lang)

return missing


def missing_translations_check(
settings: "Dict[str, Any]",
default_language: str,
survey_sheet: "SheetData",
choices_sheet: "SheetData",
warnings: "List[str]",
):
"""
Add a warning if there are missing translation columns in the survey or choices data.

:param settings: The settings sheet data.
:param default_language: The XLSForm default lanugage setting.
:param survey_sheet: The survey sheet data.
:param choices_sheet: The choices sheet data.
:param warnings: The warnings list, which may be empty.
:return: The warnings list, possibly with a new message, otherwise unchanged.
"""
survey_missing_trans = find_missing_translations(
sheet_data=survey_sheet,
translatable_columns=aliases.TRANSLATABLE_SURVEY_COLUMNS,
)
choices_missing_trans = find_missing_translations(
sheet_data=choices_sheet,
translatable_columns=aliases.TRANSLATABLE_CHOICES_COLUMNS,
)
if (0 < len(survey_missing_trans) or 0 < len(choices_missing_trans)) and (
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
"default_language" not in settings
or default_language == constants.DEFAULT_LANGUAGE_VALUE
):
msg = format_missing_translations_msg(
_in={"survey": survey_missing_trans, "choices": choices_missing_trans}
)
if msg is not None:
warnings.append(msg)
return warnings
42 changes: 30 additions & 12 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
from pyxform import aliases, constants
from pyxform.errors import PyXFormError
from pyxform.utils import default_is_dynamic, is_valid_xml_tag, levenshtein_distance
from pyxform.validators.pyxform.missing_translations_check import (
missing_translations_check,
)
from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict

if TYPE_CHECKING:
from typing import Any, Dict, KeysView, Optional
from typing import Any, Dict, KeysView, List, Optional


SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
Expand Down Expand Up @@ -92,11 +95,11 @@ def replace_smart_quotes_in_dict(_d):


def dealias_and_group_headers(
dict_array,
header_aliases,
use_double_colons,
default_language="default",
ignore_case=False,
dict_array: "List[Dict]",
header_aliases: "Dict",
use_double_colons: bool,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
ignore_case: bool = False,
):
"""
For each row in the worksheet, group all keys that contain a double colon.
Expand Down Expand Up @@ -220,7 +223,7 @@ def group_dictionaries_by_key(list_of_dicts, key, remove_key=True):
return dict_of_lists


def has_double_colon(workbook_dict):
def has_double_colon(workbook_dict) -> bool:
"""
Look for a column header with a doublecolon (::) and
return true if one is found.
Expand Down Expand Up @@ -344,7 +347,7 @@ def workbook_to_json(
workbook_dict,
form_name=None,
fallback_form_name=None,
default_language="default",
default_language=constants.DEFAULT_LANGUAGE_VALUE,
warnings=None,
) -> "Dict[str, Any]":
"""
Expand Down Expand Up @@ -438,7 +441,7 @@ def workbook_to_json(
settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
replace_smart_quotes_in_dict(settings)

default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)
default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language)

# add_none_option is a boolean that when true,
# indicates a none option should automatically be added to selects.
Expand All @@ -457,7 +460,7 @@ def workbook_to_json(
constants.TITLE: id_string,
constants.ID_STRING: id_string,
constants.SMS_KEYWORD: sms_keyword,
constants.DEFAULT_LANGUAGE: default_language,
constants.DEFAULT_LANGUAGE_KEY: default_language,
# By default the version is based on the date and time yyyymmddhh
# Leaving default version out for now since it might cause
# problems for formhub.
Expand Down Expand Up @@ -487,6 +490,8 @@ def workbook_to_json(
choices_sheet, aliases.list_header, use_double_colons, default_language
)
combined_lists = group_dictionaries_by_key(choices_sheet, constants.LIST_NAME)
# To combine the warning into one message, the check for missing choices translation
# columns is run with Survey sheet below.

choices = combined_lists
# Make sure all the options have the required properties:
Expand Down Expand Up @@ -568,6 +573,16 @@ def workbook_to_json(
)
survey_sheet = dealias_types(survey_sheet)

# Check for missing translations. The choices sheet is checked here so that the
# warning can be combined into one message.
warnings = missing_translations_check(
settings=settings,
default_language=default_language,
survey_sheet=survey_sheet,
choices_sheet=choices_sheet,
warnings=warnings,
)

# No spell check for OSM sheet (infrequently used, many spurious matches).
osm_sheet = dealias_and_group_headers(
workbook_dict.get(constants.OSM, []), aliases.list_header, True
Expand Down Expand Up @@ -1382,7 +1397,7 @@ def get_filename(path):
def parse_file_to_json(
path,
default_name="data",
default_language="default",
default_language=constants.DEFAULT_LANGUAGE_VALUE,
warnings=None,
file_object=None,
):
Expand Down Expand Up @@ -1514,7 +1529,10 @@ def _setup_question_types_dictionary(self):
types_sheet = "question types"
self._dict = self._dict[types_sheet]
self._dict = dealias_and_group_headers(
self._dict, {}, use_double_colons, "default"
dict_array=self._dict,
header_aliases={},
use_double_colons=use_double_colons,
default_language=constants.DEFAULT_LANGUAGE_VALUE,
)
self._dict = organize_by_values(self._dict, "name")

Expand Down
7 changes: 4 additions & 3 deletions tests/pyxform_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def assertPyxformXform(self, **kwargs):
* error__contains: a list of strings which should exist in the error
* error__not_contains: a list of strings which should not exist in the error
* odk_validate_error__contains: list of strings; run_odk_validate must be set
* warning__contains: a list of strings which should exist in the warnings
* warning__not_contains: a list of strings which should not exist in the warnings
* warnings__contains: a list of strings which should exist in the warnings
* warnings__not_contains: a list of strings which should not exist in the warnings
* warnings_count: the number of expected warning messages
* xml__excludes: an array of strings which should not exist in the resulting
xml. [xml|model|instance|itext]_excludes are also supported.
Expand Down Expand Up @@ -489,7 +489,8 @@ def assert_xpath_count(
content=content,
xpath=xpath,
)
self.assertEqual(expected, len(observed), matcher_context.content_str)
msg = f"XPath found no matches:\n{xpath}\n\nXForm content:\n{matcher_context.content_str}"
self.assertEqual(expected, len(observed), msg=msg)


def reorder_attributes(root):
Expand Down
Loading