From fe36014e97af25526c0fdb407b071d4c901f5c7e Mon Sep 17 00:00:00 2001 From: Michael Jumper Date: Sat, 12 Jan 2019 01:09:28 -0800 Subject: [PATCH] GUACAMOLE-699: Add helper script for verifying translations. --- guacamole/util/check-translation.py | 309 ++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100755 guacamole/util/check-translation.py diff --git a/guacamole/util/check-translation.py b/guacamole/util/check-translation.py new file mode 100755 index 0000000000..25de49cd81 --- /dev/null +++ b/guacamole/util/check-translation.py @@ -0,0 +1,309 @@ +#!/usr/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import argparse +import json +import os +import re +import sys + +parser = argparse.ArgumentParser(description='Compares two JSON translation ' + 'files, as used by the Apache Guacamole web application, listing ' + 'the strings which appear to be missing or incorrect.') + +parser.add_argument('--no-missing', dest='check_missing', action='store_false', + help='Disables checking for strings which are present in ORIGINAL but ' + 'are missing from TRANSLATED. Assuming ORIGINAL represents the set of ' + 'strings actually used by the web application, these strings are ' + 'those which are missing and need to be defined for the translation ' + 'to be complete. By default, the comparison will check for missing ' + 'translations.') + +parser.add_argument('--no-unused', dest='check_unused', action='store_false', + help='Disables checking for strings which are present in TRANSLATED ' + 'but not in ORIGINAL. Assuming ORIGINAL represents the set of strings ' + 'actually used by the web application, these strings are those which ' + 'are defined by the translation but unused. By default, the ' + 'comparison will check for unused translations.') + +parser.add_argument('--check-copied', action='store_true', help='Enables ' + 'checking for strings defined in TRANSLATED which are identical to ' + 'the corresponding strings in ORIGINAL. Such strings may have been ' + 'incorrectly copied verbatim from the original without being ' + 'translated at all. It is also possible that both languages simply ' + 'use the same text for that string, and the string is correct. As ' + 'this test can produce false positives, it is disabled by default.') + +parser.add_argument('ORIGINAL', nargs='?', help='The JSON file which should ' + 'be used as the basis for comparison. This should be JSON which can ' + 'be expected to contain every string used by the web application and ' + 'no others. Typically, this will be the primary, original language of ' + 'the web application. In the case of Apache Guacamole, this should be ' + 'English. If omitted, the file "en.json" within the same directory ' + 'as TRANSLATED will be used by default.') + +parser.add_argument('TRANSLATED', help='The JSON file which should be ' + 'compared against ORIGINAL. This should be the JSON which has been ' + 'translated from ORIGINAL, and thus should contain the same set of ' + 'strings if the translation is complete.') + +args = parser.parse_args() + +def flatten_strings(translation, prefix=u''): + """Reads all translation strings from the given JSON, taking into account + namespacing, flattening nested namespaces into a single set of key/value + pairs. + + For example, the following call: + + flatten_strings({ + u'TOP' : { + u'LETTERS' : { + u'A' : u'A', + u'B' : u'B' + }, + u'NUMBERS' : { + u'ONE' : u'1', + u'TWO' : u'2', + u'THREE' : u'3' + } + } + }) + + would return: + + { + u'TOP.LETTERS.A' : u'A', + u'TOP.LETTERS.B' : u'B', + u'TOP.NUMBERS.ONE' : u'1', + u'TOP.NUMBERS.TWO' : u'2', + u'TOP.NUMBERS.THREE' : u'3' + } + + Parameters + ---------- + translation : dict or unicode + The dict object to read translation strings from, where each key is a + translation key or namespace and each value is a translation string or + a dict containing the translations nested within that namespace. + this object is simply a Unicode string, it will be assumed to be the + value of a translation string, and the prefix provided will be assumed + to be the name. + + prefix : unicode, optional + The namespace prefix to apply to all translation strings within the + given object, if any. This parameter is optional. If omitted, an empty + string will be used. + + Returns + ------- + dict + An dict whose properties are the names of all translation strings + contained within the given object. + + """ + + strings = {} + + # If the provided object is a string, the prefix is the string name + if isinstance(translation, unicode): + strings[prefix] = translation + return strings + + # Otherwise, if the prefix is non-empty, append a period for children + if prefix: + prefix += u'.' + + # For each property of the given object, read all string names + for key, child in translation.items(): + + # Add all string names within the child under its prefix + for flattened, value in flatten_strings(child, prefix + key).items(): + strings[flattened] = value + + return strings + +class Translation: + """A set of namespaced translation strings read from a JSON file, as + supported by angular-translate and used by Apache Guacamole. + + Attributes + ---------- + lang_key : unicode + The unique key identifying the JSON translation file and the language + within that file. This will simply be the filename without the ".json" + extension. + lang_name : unicode + The name of the language as defined within the JSON translation file by + the special "NAME" key. Not all translations will define a "NAME", as + some translations (those provided by Guacamole extensions) are used as + overlays for the base translation for that language defined at the web + application level. If no "NAME" key is present, `lang_name` will be + `None`. + strings : dict + The flattened set of translation key/value pairs. Each key will contain + all applicable namespaces, separated by periods, as produced by + `flatten_strings()`. There will be no nested keys. + + """ + + + def __init__(self, path): + """ + Parses the details and contents of the JSON translation file at the + given path. + + Parameters + ---------- + path : str + The path to the JSON file containing the translation to be read. + + """ + + json_data = open(path).read() + filename = os.path.basename(path) + + self.lang_key = os.path.splitext(filename)[0] + self.strings = flatten_strings(json.loads(json_data)) + self.lang_name = self.strings.get(u'NAME', None) + + def get_missing(self, expected): + """Returns a list of translation keys which are present in the given + translation but missing from this translation. + + Parameters + ---------- + expected : Translation + The translation to compare this translation against. + + Returns + ------- + list + A list of translation keys which are present in the given + translation but are NOT present in this translation. + + """ + return [ key for key in expected.strings if not key in self.strings ] + + def get_identical(self, other): + """Returns a list of translation keys which map to the same exact value + in both this translation and the given translation. + + Parameters + ---------- + other : Translation + The translation to compare this translation against. + + Returns + ------- + list + A list of translation keys which map to the same exact value in + both translations. + + """ + return [ key for key, value in self.strings.items() + if key in other.strings and other.strings[key] == value ] + +# +# Translation keys which are expected to always be inherited from the base +# translation and thus should be missing from all translations +# + +expected_missing = { + u'APP.NAME', + u'APP.VERSION' +} + +# +# Regular expression which matches strings that are expected to be copied +# verbatim +# + +expected_copied = re.compile('|'.join([ + '^$', # Empty string + '^@:', # References to other strings + '^\\d+$', # Numbers + '^(VNC|RDP|SSH|SFTP|Telnet)$', # Protocol names + '^(Apache )?Guacamole$' # Guacamole itself +])) + +# +# Read provided input files +# + +orig = Translation(args.ORIGINAL + or '{}/en.json'.format(os.path.dirname(args.TRANSLATED))) + +trans = Translation(args.TRANSLATED) + +print u'Original language: {} ({})'.format(orig.lang_key, orig.lang_name) +print u'Translation language: {} ({})'.format(trans.lang_key, trans.lang_name) + +# Ignore keys that are expected to be missing +orig.strings = { key:value for key, value in orig.strings.items() + if key not in expected_missing } + +# +# Perform requested tests +# + +missing = trans.get_missing(orig) if args.check_missing else [] +unused = orig.get_missing(trans) if args.check_unused else [] +copied = orig.get_identical(trans) if args.check_copied else [] + +# Exclude keys which are expected to be copied +copied = [ key for key in copied + if not expected_copied.match(orig.strings[key]) ] + +# +# Group any errors encountered by type +# + +if missing: + print('\nThe following strings are missing from the translation and ' + 'should be added:\n') + for name in sorted(missing): + print ' {}'.format(name) + +if unused: + print('\nThe following strings are either NOT defined for the original ' + 'language or are expected to be inherited from the original ' + 'language and should be removed:\n') + for name in sorted(unused): + print ' {}'.format(name) + +if copied: + print('\nThe following strings are identical to the original language ' + 'and MIGHT be untranslated:\n') + for name in sorted(copied): + print ' {}'.format(name) + +# +# Count total number of errors and summarize result +# + +errors = len(missing) + len(unused) + len(copied) + +if errors: + print '\n{} error(s) total.'.format(errors) + sys.exit(1) + +print '\nCheck completed successfully. No errors.' +