Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

157 missing translations warning #571

Merged
merged 23 commits into from
Feb 10, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e572ea4
add: warning for missing translations in the survey sheet
lindsay-stevens Nov 19, 2021
bd7cf43
add: happy path tests for establishing translations behaviour
lindsay-stevens Nov 22, 2021
b796c37
fix: formatting
lindsay-stevens Nov 22, 2021
ecc63f6
fix: clarify xpath string names and comments
lindsay-stevens Nov 22, 2021
8ae5482
add: test cases / handling for issue 157 case and media cols, tidy up
lindsay-stevens Nov 23, 2021
3c69a6f
add: test cases for choices sheet translations, organise tests
lindsay-stevens Nov 24, 2021
1763349
chg: refactor remaining translation tests to use XPathHelper, add docs
lindsay-stevens Nov 26, 2021
83ef307
add: show the XPath and the XForm when an assert_xpath_count fails
lindsay-stevens Nov 26, 2021
37ba6ee
add: tests / handling for constraintMsg and requiredMsg missing langs
lindsay-stevens Nov 26, 2021
dfeca3e
fix: remove leftover debug=True parameter, correct docstring typo
lindsay-stevens Nov 30, 2021
90a2895
add: check for missing translations in choices sheet
lindsay-stevens Nov 30, 2021
d3e6f82
chg: move missing_translations_check into validator subpackage
lindsay-stevens Nov 30, 2021
ffd073d
add: test case for xlsform with missing translations in survey + choices
lindsay-stevens Nov 30, 2021
06fe4c0
fix: formatting
lindsay-stevens Nov 30, 2021
6626782
fix: remove test debug setting, use survey constant instead of string
lindsay-stevens Nov 30, 2021
ad6830d
fix: formatting
lindsay-stevens Nov 30, 2021
99b0eae
fix: formatting
lindsay-stevens Nov 30, 2021
978c307
add: performance test for missing translations check (default skipped)
lindsay-stevens Dec 6, 2021
f1babeb
Merge branch 'master' into pyxform-157
lindsay-stevens Dec 6, 2021
eb6f0fe
fix: performance improvements for large forms
lindsay-stevens Jan 21, 2022
6e9adb6
Merge branch 'master' into pyxform-157
lindsay-stevens Jan 28, 2022
ed1701c
chg: remove default_language from check criteria, update warning format
lindsay-stevens Feb 7, 2022
8c12e4e
fix: formatting / linter warnings
lindsay-stevens Feb 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
SUBMISSION_URL = "submission_url"
AUTO_SEND = "auto_send"
AUTO_DELETE = "auto_delete"
DEFAULT_LANGUAGE = "default_language"
DEFAULT_LANGUAGE_KEY = "default_language"
DEFAULT_LANGUAGE_VALUE = "default"
LABEL = "label"
HINT = "hint"
STYLE = "style"
Expand Down Expand Up @@ -102,3 +103,11 @@
AUDIO_QUALITY_LOW = "low"
AUDIO_QUALITY_NORMAL = "normal"
AUDIO_QUALITY_EXTERNAL = "external"

TRANSLATABLE_SURVEY_COLUMNS = (
"label",
"hint",
"media::image",
"media::video",
"media::audio",
)
89 changes: 76 additions & 13 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import re
import sys
from collections import Counter
from collections import Counter, defaultdict
from typing import TYPE_CHECKING

from pyxform import aliases, constants
Expand All @@ -16,7 +16,7 @@
from pyxform.xls2json_backends import csv_to_dict, xls_to_dict

if TYPE_CHECKING:
from typing import Any, Dict, KeysView, Optional
from typing import Any, Dict, KeysView, List, Optional, Sequence, Tuple, Union


SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
Expand Down Expand Up @@ -92,11 +92,11 @@ def replace_smart_quotes_in_dict(_d):


def dealias_and_group_headers(
dict_array,
header_aliases,
use_double_colons,
default_language="default",
ignore_case=False,
dict_array: "List[Dict]",
header_aliases: "Dict",
use_double_colons: bool,
default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
ignore_case: bool = False,
):
"""
For each row in the worksheet, group all keys that contain a double colon.
Expand Down Expand Up @@ -220,7 +220,7 @@ def group_dictionaries_by_key(list_of_dicts, key, remove_key=True):
return dict_of_lists


def has_double_colon(workbook_dict):
def has_double_colon(workbook_dict) -> bool:
"""
Look for a column header with a doublecolon (::) and
return true if one is found.
Expand Down Expand Up @@ -340,11 +340,61 @@ def find_sheet_misspellings(key: str, keys: "KeysView") -> "Optional[str]":
return None


def format_missing_translations_survey_msg(_in: "Dict[str, Sequence]") -> str:
return (
"Missing translation(s): there is no default_language set, and a translation "
"was not found for the survey column(s) and language(s): {m}. "
"To avoid unexpected form behaviour, specify a default_language in the "
"settings sheet, or add the missing translation(s) to the survey sheet."
).format(m="; ".join([f"'{k}': '" + "', '".join(v) + "'" for k, v in _in.items()]))


def find_missing_translations_survey(
survey_sheet: "List[Dict[str, Union[str, Dict]]]",
) -> "Tuple[Dict[str, List[str]], Tuple[str, ...]]":
"""
Find missing translation columns in the survey sheet data.

For each translatable column used in the sheet, there should be a translation for
each language (including the default / unspecified language) that is used for any
other translatable column.

This could be more efficient by not looking at every row, but that's how the data is
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
arranged by the time it comes to this module. On the bright side it means this
function could be adapted to warn about specific items lacking translations, even
when there are no missing translation columns.

:param survey_sheet: The Survey sheet data.
:return: Dict[column_name, List[languages]], Tuple[languages_seen]
"""
languages_seen = defaultdict(list)
translatables_seen = dict() # Unique but retain order.
for row in survey_sheet:
for t in constants.TRANSLATABLE_SURVEY_COLUMNS:
column = row.get(t)
if column is not None:
if isinstance(column, str):
languages_seen[constants.DEFAULT_LANGUAGE_VALUE].append(t)
translatables_seen[t] = None
elif isinstance(column, dict):
for k in column:
languages_seen[k].append(t)
translatables_seen[t] = None

missing = defaultdict(list)
for lang, lang_trans in languages_seen.items():
for seen_tran in translatables_seen:
if seen_tran not in lang_trans:
missing[seen_tran].append(lang)

return missing, tuple(languages_seen.keys())


def workbook_to_json(
workbook_dict,
form_name=None,
fallback_form_name=None,
default_language="default",
default_language=constants.DEFAULT_LANGUAGE_VALUE,
warnings=None,
) -> "Dict[str, Any]":
"""
Expand Down Expand Up @@ -438,7 +488,7 @@ def workbook_to_json(
settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
replace_smart_quotes_in_dict(settings)

default_language = settings.get(constants.DEFAULT_LANGUAGE, default_language)
default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language)

# add_none_option is a boolean that when true,
# indicates a none option should automatically be added to selects.
Expand All @@ -457,7 +507,7 @@ def workbook_to_json(
constants.TITLE: id_string,
constants.ID_STRING: id_string,
constants.SMS_KEYWORD: sms_keyword,
constants.DEFAULT_LANGUAGE: default_language,
constants.DEFAULT_LANGUAGE_KEY: default_language,
# By default the version is based on the date and time yyyymmddhh
# Leaving default version out for now since it might cause
# problems for formhub.
Expand Down Expand Up @@ -568,6 +618,16 @@ def workbook_to_json(
)
survey_sheet = dealias_types(survey_sheet)

# Look for / warn on missing translations.
missing, langs = find_missing_translations_survey(survey_sheet=survey_sheet)
no_default = (
"default_language" not in settings
or default_language == constants.DEFAULT_LANGUAGE_VALUE
)
not_default = "default" not in langs
if (no_default or not_default) and 0 < len(missing):
warnings.append(format_missing_translations_survey_msg(_in=missing))

# No spell check for OSM sheet (infrequently used, many spurious matches).
osm_sheet = dealias_and_group_headers(
workbook_dict.get(constants.OSM, []), aliases.list_header, True
Expand Down Expand Up @@ -1380,7 +1440,7 @@ def get_filename(path):
def parse_file_to_json(
path,
default_name="data",
default_language="default",
default_language=constants.DEFAULT_LANGUAGE_VALUE,
warnings=None,
file_object=None,
):
Expand Down Expand Up @@ -1512,7 +1572,10 @@ def _setup_question_types_dictionary(self):
types_sheet = "question types"
self._dict = self._dict[types_sheet]
self._dict = dealias_and_group_headers(
self._dict, {}, use_double_colons, "default"
dict_array=self._dict,
header_aliases={},
use_double_colons=use_double_colons,
default_language=constants.DEFAULT_LANGUAGE_VALUE,
)
self._dict = organize_by_values(self._dict, "name")

Expand Down
179 changes: 176 additions & 3 deletions tests/test_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
"""
Test translations syntax.
"""
from pyxform.xls2json import format_missing_translations_survey_msg
from tests.pyxform_test_case import PyxformTestCase


class DoubleColonTranslations(PyxformTestCase):
def test_langs(self):
class TestTranslations(PyxformTestCase):
def test_double_colon_translations(self):
model_contains = (
"""<bind nodeset="/translations/n1"""
+ """" readonly="true()" type="string"/>"""
Expand Down Expand Up @@ -36,8 +37,180 @@ def test_langs(self):
model__contains=[model_contains],
)

def test_missing_translation_survey__warn__no_default_no_other_lang(self):
"""Should warn if there's a missing translation and no default_language."""
md = """
| survey | | | | | |
| | type | name | label | label::french | hint |
| | note | n1 | hello | bonjour | a salutation |
"""
observed = []
self.assertPyxformXform(
name="test",
md=md,
warnings=observed,
xml__xpath_match=[
# Label is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:label[@ref="jr:itext('/test/n1:label')"]""",
# Hint is not translated.
"/h:html/h:body/x:input[@ref='/test/n1']/x:hint[text()='a salutation']",
# Default label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@default='true()' and @lang='default']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='hello']",
# French label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='french']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='bonjour']",
],
)
expected = format_missing_translations_survey_msg(_in={"hint": ["french"]})
self.assertIn(expected, observed)

def test_missing_translation_survey__warn__no_default_two_way(self):
"""Should warn if there's a missing translation and no default_language."""
md = """
| survey | | | | |
| | type | name | label::french | hint |
| | note | n1 | bonjour | a salutation |
"""
observed = []
self.assertPyxformXform(
name="test",
md=md,
warnings=observed,
xml__xpath_match=[
# Label is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:label[@ref="jr:itext('/test/n1:label')"]""",
# Hint is not translated.
"/h:html/h:body/x:input[@ref='/test/n1']/x:hint[text()='a salutation']",
# Default label not in translations.
"/h:html/h:head/x:model/x:itext[not(descendant::x:translation[@lang='default'])]",
# French label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='french']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='bonjour']",
],
)
expected = format_missing_translations_survey_msg(
_in={"hint": ["french"], "label": ["default"]}
)
self.assertIn(expected, observed)

def test_missing_translation_survey__warn__no_default_with_other_lang(self):
"""Should warn if there's a missing translation and no default_language."""
md = """
| survey | | | | | |
| | type | name | label::english | label::french | hint::english |
| | note | n1 | hello | bonjour | a salutation |
"""
observed = []
self.assertPyxformXform(
name="test",
md=md,
warnings=observed,
xml__xpath_match=[
# Label is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:label[@ref="jr:itext('/test/n1:label')"]""",
# Hint is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:hint[@ref="jr:itext('/test/n1:hint')"]""",
# English label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='english']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='hello']",
# French label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='french']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='bonjour']",
# English hint in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='english']"
+ "/x:text[@id='/test/n1:hint']/x:value[text()='a salutation']",
# French hint in translations but with a dash instead of something meaningful.
lognaturel marked this conversation as resolved.
Show resolved Hide resolved
# TODO: is this a bug?
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='french']"
+ "/x:text[@id='/test/n1:hint']/x:value[text()='-']",
],
)
expected = format_missing_translations_survey_msg(_in={"hint": ["french"]})
self.assertIn(expected, observed)

def test_missing_translation_survey__warn__default_with_other_lang(self):
"""Should not warn if there's a missing translation with a default_language."""
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
md = """
| settings | |
| | default_language |
| | english |
| survey | | | | | |
| | type | name | label::english | label::french | hint::english |
| | note | n1 | hello | bonjour | a salutation |
"""
observed = []
self.assertPyxformXform(
name="test",
md=md,
warnings=observed,
xml__xpath_match=[
# Label is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:label[@ref="jr:itext('/test/n1:label')"]""",
# Hint is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:hint[@ref="jr:itext('/test/n1:hint')"]""",
# English label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='english' and @default='true()']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='hello']",
# French label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='french']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='bonjour']",
# English hint in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='english']"
+ "/x:text[@id='/test/n1:hint']/x:value[text()='a salutation']",
# French hint in translations but with a dash instead of something meaningful.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@lang='french']"
+ "/x:text[@id='/test/n1:hint']/x:value[text()='-']",
],
)
expected = format_missing_translations_survey_msg(_in={"hint": ["french"]})
self.assertIn(expected, observed)

def test_missing_translation_survey__no_warn__default_no_other_lang(self):
"""Should not warn if there's a missing translation with a default_language."""
lindsay-stevens marked this conversation as resolved.
Show resolved Hide resolved
md = """
| settings | |
| | default_language |
| | french |
| survey | | | | | |
| | type | name | label | label::french | hint |
| | note | n1 | hello | bonjour | a salutation |
"""
observed = []
self.assertPyxformXform(
name="test",
md=md,
warnings=observed,
xml__xpath_match=[
# Label is translated.
"""/h:html/h:body/x:input[@ref='/test/n1']/x:label[@ref="jr:itext('/test/n1:label')"]""",
# Hint is not translated.
"/h:html/h:body/x:input[@ref='/test/n1']/x:hint[text()='a salutation']",
# Default label not in translations.
# TODO: is this a bug?
"/h:html/h:head/x:model/x:itext[not(descendant::x:translation[@lang='default'])]",
# French label in translations.
"/h:html/h:head/x:model/x:itext"
+ "/x:translation[@default='true()' and @lang='french']"
+ "/x:text[@id='/test/n1:label']/x:value[text()='bonjour']",
],
)
expected = format_missing_translations_survey_msg(_in={"hint": ["french"]})
self.assertNotIn(expected, observed)


class TransaltionsTest(PyxformTestCase):
class TranslationsTest(PyxformTestCase):
"""Test XLSForm translations."""

def test_missing_media_itext(self):
Expand Down