Permalink
Browse files

Add check for XML validity (issue #145)

  • Loading branch information...
1 parent 9c16b4b commit eab1282e52dbf2c07c4279557df0f2f6144288ed @nijel nijel committed Nov 13, 2012
Showing with 45 additions and 0 deletions.
  1. +10 −0 docs/usage.rst
  2. +1 −0 weblate/settings_example.py
  3. +34 −0 weblate/trans/checks.py
View
@@ -319,6 +319,16 @@ Zero-width space
Translation contains extra zero-width space (<U+200B>) character. This
character is usually inserted by mistake.
+
+.. _check-xml-tags:
+
+XML tags mismatch
+~~~~~~~~~~~~~~~~~
+
+XML tags in translation do not match source. This usually means resulting
+output will look different. In most cases this is not desired result from
+translation, but occasionally it is desired.
+
.. _check-optional-plural:
Source checks
@@ -339,6 +339,7 @@
'weblate.trans.checks.NewlineCountingCheck',
'weblate.trans.checks.BBCodeCheck',
'weblate.trans.checks.ZeroWidthSpaceCheck',
+ 'weblate.trans.checks.XMLTagsCheck',
'weblate.trans.checks.OptionalPluralCheck',
'weblate.trans.checks.EllipsisCheck',
)
@@ -21,6 +21,8 @@
from django.utils.translation import ugettext_lazy as _
from django.core.exceptions import ImproperlyConfigured
from django.conf import settings
+from django.core.cache import cache
+from xml.etree import ElementTree
import weblate
import re
@@ -60,6 +62,8 @@
BBCODE_MATCH = re.compile(r'\[(?P<tag>[^]]*)(?=(@[^]]*)?\](.*?)\[\/(?P=tag)\])', re.MULTILINE)
+XML_MATCH = re.compile(r'<[^>]+>')
+
# Matches (s) not followed by alphanumeric chars or at the end
PLURAL_MATCH = re.compile(r'\(s\)(\W|\Z)')
@@ -155,6 +159,7 @@
'weblate.trans.checks.NewlineCountingCheck',
'weblate.trans.checks.BBCodeCheck',
'weblate.trans.checks.ZeroWidthSpaceCheck',
+ 'weblate.trans.checks.XMLTagsCheck',
'weblate.trans.checks.OptionalPluralCheck',
'weblate.trans.checks.EllipsisCheck',
)
@@ -613,6 +618,35 @@ class ZeroWidthSpaceCheck(TargetCheck):
def check_single(self, source, target, flags, language, unit):
return u'\u200b' in target and not u'\u200b' in source
+class XMLTagsCheck(TargetCheck):
+ '''
+ Check whether XML in target matches source.
+ '''
+ check_id = 'xml-tags'
+ name = _('XML tags mismatch')
+ description = _('XML tags in translation do not match source')
+
+ def check_single(self, source, target, flags, language, unit):
+ # Quick check if source looks like XML
+ if not '<' in source or len(XML_MATCH.findall(source)) == 0:
+ return False
+ # Check if source is XML
+ try:
+ source_tree = ElementTree.fromstring('<weblate>%s</weblate>' % source)
+ source_tags = [x.tag for x in source_tree.iter()]
+ except:
+ # Source is not valid XML, we give up
+ return False
+ # Check target
+ try:
+ target_tree = ElementTree.fromstring('<weblate>%s</weblate>' % target)
+ target_tags = [x.tag for x in target_tree.iter()]
+ except:
+ # Target is not valid XML
+ return True
+ # Compare tags
+ return source_tags != target_tags
+
class OptionalPluralCheck(SourceCheck):
'''
Check for not used plural form.

0 comments on commit eab1282

Please sign in to comment.