Skip to content

Commit

Permalink
Merge pull request #571 from lindsay-stevens/pyxform-157
Browse files Browse the repository at this point in the history
Warn when some languages are missing translations
  • Loading branch information
lognaturel committed Feb 10, 2022
2 parents 501b821 + 8c12e4e commit f4cbc67
Show file tree
Hide file tree
Showing 8 changed files with 1,651 additions and 76 deletions.
19 changes: 19 additions & 0 deletions pyxform/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@
"body": "control",
"parameters": "parameters",
}
# Key is the pyxform internal name, Value is the name used in error/warning messages.
TRANSLATABLE_SURVEY_COLUMNS = {
constants.LABEL: constants.LABEL,
# Per ODK Spec, could include "short" once pyxform supports it.
constants.HINT: constants.HINT,
"guidance_hint": "guidance_hint",
"image": survey_header["image"],
# Per ODK Spec, could include "big-image" once pyxform supports it.
"audio": survey_header["audio"],
"video": survey_header["video"],
"jr:constraintMsg": "constraint_message",
"jr:requiredMsg": "required_message",
}
TRANSLATABLE_CHOICES_COLUMNS = {
"label": constants.LABEL,
"image": "media::image",
"audio": "media::audio",
"video": "media::video",
}
list_header = {
"caption": constants.LABEL,
"list_name": constants.LIST_NAME,
Expand Down
3 changes: 2 additions & 1 deletion pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
SUBMISSION_URL = "submission_url"
AUTO_SEND = "auto_send"
AUTO_DELETE = "auto_delete"
DEFAULT_LANGUAGE = "default_language"
DEFAULT_LANGUAGE_KEY = "default_language"
DEFAULT_LANGUAGE_VALUE = "default"
LABEL = "label"
HINT = "hint"
STYLE = "style"
Expand Down
9 changes: 5 additions & 4 deletions pyxform/section.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ def validate(self):
# there's a stronger test of this when creating the xpath
# dictionary for a survey.
def _validate_uniqueness_of_element_names(self):
element_slugs = []
element_slugs = set()
for element in self.children:
if any(element.name.lower() == s.lower() for s in element_slugs):
elem_lower = element.name.lower()
if elem_lower in element_slugs:
raise PyXFormError(
"There are more than one survey elements named '%s' "
"(case-insensitive) in the section named '%s'."
% (element.name.lower(), self.name)
% (elem_lower, self.name)
)
element_slugs.append(element.name)
element_slugs.add(elem_lower)

def xml_instance(self, **kwargs):
"""
Expand Down
Empty file.
138 changes: 138 additions & 0 deletions pyxform/validators/pyxform/missing_translations_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from collections import defaultdict
from typing import TYPE_CHECKING

from pyxform import aliases, constants
from pyxform.errors import PyXFormError

if TYPE_CHECKING:
from typing import Dict, List, Optional, Sequence, Union

SheetData = List[Dict[str, Union[str, Dict]]]


def format_missing_translations_msg(
_in: "Dict[str, Dict[str, Sequence]]",
) -> "Optional[str]":
"""
Format the missing translations data into a warning message.
:param _in: A dict structured as Dict[survey|choices: Dict[language: (columns)]].
In other words, for the survey or choices sheet, a dict of the language(s) and
column names for which there are missing translations.
:return: The warning message, or None if there were no missing columns.
"""

def get_sheet_msg(name, sheet):
if sheet is not None:
langs = sorted(sheet.keys())
if 0 < len(langs):
lang_msgs = []
for lang in langs:
cols = sheet[lang]
if isinstance(cols, str):
msg = f"Expected a sequence of columns, got a string for {lang}."
PyXFormError(msg)
if 1 == len(cols):
msg = f"Language '{lang}' is missing the {name} {cols[0]} column."
lang_msgs.append(msg)
if 1 < len(cols):
c = ", ".join(sorted(cols))
msg = f"Language '{lang}' is missing the {name} columns {c}."
lang_msgs.append(msg)
return "\n".join(lang_msgs)
return None

survey = get_sheet_msg(name=constants.SURVEY, sheet=_in.get(constants.SURVEY))
choices = get_sheet_msg(name=constants.CHOICES, sheet=_in.get(constants.CHOICES))

messages = tuple(i for i in (survey, choices) if i is not None)
if 0 == len(messages):
return None
return "\n".join(messages)


def find_missing_translations(
sheet_data: "SheetData",
translatable_columns: "Dict[str, str]",
) -> "Dict[str, List[str]]":
"""
Find missing translation columns in the sheet data.
For each translatable column used in the sheet, there should be a translation for
each language (including the default / unspecified language) that is used for any
other translatable column.
This checks the first row only since it is concerned with the presence of columns, not
individual cells. It therefore assumes that each row object has the same structure.
:param sheet_data: The survey or choices sheet data.
:param translatable_columns: The translatable columns for a sheet. The structure
should be Dict[internal_name, external_name]. See the aliases module.
:return: Dict[column_name, List[languages]]
"""
translations_seen = defaultdict(list)
translation_columns_seen = set()

def process_cell(typ, cell):
if cell is not None:
if typ in translatable_columns.keys():
name = translatable_columns[typ]
if isinstance(cell, str):
translations_seen[constants.DEFAULT_LANGUAGE_VALUE].append(name)
translation_columns_seen.add(name)
elif isinstance(cell, dict):
for lng in cell:
translations_seen[lng].append(name)
translation_columns_seen.add(name)

if 0 < len(sheet_data):
# e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"})
for column_type, cell_content in sheet_data[0].items():
if column_type == constants.MEDIA:
# e.g. ("audio", {"eng": "my.mp3"})
for media_type, media_cell in cell_content.items():
process_cell(typ=media_type, cell=media_cell)
if column_type == constants.BIND:
# e.g. ("jr:constraintMsg", "Try again")
for bind_type, bind_cell in cell_content.items():
process_cell(typ=bind_type, cell=bind_cell)
else:
process_cell(typ=column_type, cell=cell_content)

missing = defaultdict(list)
for lang, lang_trans in translations_seen.items():
for seen_tran in translation_columns_seen:
if seen_tran not in lang_trans:
missing[lang].append(seen_tran)

return missing


def missing_translations_check(
survey_sheet: "SheetData",
choices_sheet: "SheetData",
warnings: "List[str]",
):
"""
Add a warning if there are missing translation columns in the survey or choices data.
:param survey_sheet: The survey sheet data.
:param choices_sheet: The choices sheet data.
:param warnings: The warnings list, which may be empty.
:return: The warnings list, possibly with a new message, otherwise unchanged.
"""
survey_missing_trans = find_missing_translations(
sheet_data=survey_sheet,
translatable_columns=aliases.TRANSLATABLE_SURVEY_COLUMNS,
)
choices_missing_trans = find_missing_translations(
sheet_data=choices_sheet,
translatable_columns=aliases.TRANSLATABLE_CHOICES_COLUMNS,
)
if 0 < len(survey_missing_trans) or 0 < len(choices_missing_trans):
msg = format_missing_translations_msg(
_in={"survey": survey_missing_trans, "choices": choices_missing_trans}
)
if msg is not None:
warnings.append(msg)
return warnings
40 changes: 28 additions & 12 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
from pyxform import aliases, constants
from pyxform.errors import PyXFormError
from pyxform.utils import default_is_dynamic, is_valid_xml_tag, levenshtein_distance
from pyxform.validators.pyxform.missing_translations_check import (
missing_translations_check,
)
from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict

if TYPE_CHECKING:
from typing import Any, Dict, KeysView, Optional
from typing import Any, Dict, KeysView, List, Optional


SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
Expand Down Expand Up @@ -92,11 +95,11 @@ def replace_smart_quotes_in_dict(_d):


def dealias_and_group_headers(
dict_array,
header_aliases,
use_double_colons,
default_language="default",
ignore_case=False,
dict_array: "List[Dict]",
header_aliases: "Dict",
use_double_colons: bool,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
ignore_case: bool = False,
):
"""
For each row in the worksheet, group all keys that contain a double colon.
Expand Down Expand Up @@ -220,7 +223,7 @@ def group_dictionaries_by_key(list_of_dicts, key, remove_key=True):
return dict_of_lists


def has_double_colon(workbook_dict):
def has_double_colon(workbook_dict) -> bool:
"""
Look for a column header with a doublecolon (::) and
return true if one is found.
Expand Down Expand Up @@ -344,7 +347,7 @@ def workbook_to_json(
workbook_dict,
form_name=None,
fallback_form_name=None,
default_language="default",
default_language=constants.DEFAULT_LANGUAGE_VALUE,
warnings=None,
) -> "Dict[str, Any]":
"""
Expand Down Expand Up @@ -438,7 +441,7 @@ def workbook_to_json(
settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
replace_smart_quotes_in_dict(settings)

default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)
default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language)

# add_none_option is a boolean that when true,
# indicates a none option should automatically be added to selects.
Expand All @@ -457,7 +460,7 @@ def workbook_to_json(
constants.TITLE: id_string,
constants.ID_STRING: id_string,
constants.SMS_KEYWORD: sms_keyword,
constants.DEFAULT_LANGUAGE: default_language,
constants.DEFAULT_LANGUAGE_KEY: default_language,
# By default the version is based on the date and time yyyymmddhh
# Leaving default version out for now since it might cause
# problems for formhub.
Expand Down Expand Up @@ -487,6 +490,8 @@ def workbook_to_json(
choices_sheet, aliases.list_header, use_double_colons, default_language
)
combined_lists = group_dictionaries_by_key(choices_sheet, constants.LIST_NAME)
# To combine the warning into one message, the check for missing choices translation
# columns is run with Survey sheet below.

choices = combined_lists
# Make sure all the options have the required properties:
Expand Down Expand Up @@ -568,6 +573,14 @@ def workbook_to_json(
)
survey_sheet = dealias_types(survey_sheet)

# Check for missing translations. The choices sheet is checked here so that the
# warning can be combined into one message.
warnings = missing_translations_check(
survey_sheet=survey_sheet,
choices_sheet=choices_sheet,
warnings=warnings,
)

# No spell check for OSM sheet (infrequently used, many spurious matches).
osm_sheet = dealias_and_group_headers(
workbook_dict.get(constants.OSM, []), aliases.list_header, True
Expand Down Expand Up @@ -1382,7 +1395,7 @@ def get_filename(path):
def parse_file_to_json(
path,
default_name="data",
default_language="default",
default_language=constants.DEFAULT_LANGUAGE_VALUE,
warnings=None,
file_object=None,
):
Expand Down Expand Up @@ -1514,7 +1527,10 @@ def _setup_question_types_dictionary(self):
types_sheet = "question types"
self._dict = self._dict[types_sheet]
self._dict = dealias_and_group_headers(
self._dict, {}, use_double_colons, "default"
dict_array=self._dict,
header_aliases={},
use_double_colons=use_double_colons,
default_language=constants.DEFAULT_LANGUAGE_VALUE,
)
self._dict = organize_by_values(self._dict, "name")

Expand Down
7 changes: 4 additions & 3 deletions tests/pyxform_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def assertPyxformXform(self, **kwargs):
* error__contains: a list of strings which should exist in the error
* error__not_contains: a list of strings which should not exist in the error
* odk_validate_error__contains: list of strings; run_odk_validate must be set
* warning__contains: a list of strings which should exist in the warnings
* warning__not_contains: a list of strings which should not exist in the warnings
* warnings__contains: a list of strings which should exist in the warnings
* warnings__not_contains: a list of strings which should not exist in the warnings
* warnings_count: the number of expected warning messages
* xml__excludes: an array of strings which should not exist in the resulting
xml. [xml|model|instance|itext]_excludes are also supported.
Expand Down Expand Up @@ -489,7 +489,8 @@ def assert_xpath_count(
content=content,
xpath=xpath,
)
self.assertEqual(expected, len(observed), matcher_context.content_str)
msg = f"XPath found no matches:\n{xpath}\n\nXForm content:\n{matcher_context.content_str}"
self.assertEqual(expected, len(observed), msg=msg)


def reorder_attributes(root):
Expand Down
Loading

0 comments on commit f4cbc67

Please sign in to comment.