Permalink
Browse files

implemented context guesser

  • Loading branch information...
1 parent 2e5ffc7 commit b2babd8582ffe520ab66bae78a8bb734fea4ad68 Anton Bobrov committed Sep 28, 2009
Showing with 86 additions and 9 deletions.
  1. +84 −0 context.py
  2. +2 −9 validator/schema.py
View
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+class Node(object):
+ def __init__(self, parent, start, name, end):
+ self.parent = parent
+ self.start = start
+ self.name = name
+ self.end = end
+
+def get_position(line, column, xml):
+ pos = 0
+ for i, l in enumerate(xml.splitlines(True)):
+ if i + 1 < line:
+ pos += len(l)
+ else:
+ return pos + column
+
+ return pos
+
+def get_tags(xml, end_position):
+ comments = []
+
+ def is_in_comment(tag_pos):
+ for start, end in comments:
+ if start <= tag_pos <= end:
+ return True
+
+ return False
+
+ for match in re.finditer(u'(?is)<!--.*?-->', xml):
+ comments.append((match.start(), match.end()))
+
+ parent = None
+ current = None
+ for match in re.finditer(u'(?isu)(<\?|(</|<)(?P<tag>[a-z0-9_:]*))', xml):
+ tag = match.group()
+ tag_pos = match.start()
+
+ if is_in_comment(tag_pos): continue
+
+ if tag[1] == '?': continue
+
+ if tag_pos > end_position: break
+
+ tag_name = match.group('tag').lower()
+
+ if tag[1] == '/':
+ is_open = False
+ tag_end_pos = match.end() + 1
+ else:
+ full_close_pos = xml.find('/>', tag_pos)
+ semi_close_pos = xml.find('>', tag_pos)
+
+ if full_close_pos < semi_close_pos:
+ is_open = None
+ tag_end_pos = full_close_pos + 2
+ else:
+ is_open = True
+ tag_end_pos = semi_close_pos + 1
+
+
+ if is_open:
+ current = parent = Node(parent, tag_pos, tag_name, tag_end_pos)
+ elif is_open == False:
+ current = Node(parent, tag_pos, tag_name, tag_end_pos)
+ parent = parent.parent
+ elif is_open is None and tag_end_pos > end_position:
+ current = Node(parent, tag_pos, tag_name, tag_end_pos)
+
+ return current
+
+def guess_context(line, column, xml):
+ if not isinstance(xml, unicode):
+ raise Exception('I can handle only unicode data')
+
+ end_position = get_position(line, column, xml)
+
+ node = get_tags(xml, end_position)
+ while(node):
+ print node.name
+ node = node.parent
+
View
@@ -2,23 +2,21 @@
from lxml import etree
-
class SchemaNotFoundException(Exception): pass
-
class SchemaValidator(object):
+ error_log = property(lambda self:self.schema.error_log)
+
def __init__(self, root, catalog):
self.catalog = catalog
self.root = root
self.schema = self.get_schema(root)
-
def get_xsd_filename_for_ns(self, ns):
try:
return self.catalog[ns]
except KeyError:
raise SchemaNotFoundException("Can't find xsd for [%s] namespace" % ns)
-
def get_schema(self, root):
merged_schema = \
@@ -38,8 +36,3 @@ def get_schema(self, root):
def validate(self):
return self.schema.validate(self.root)
-
- def get_error_log(self):
- return self.schema.error_log
-
- error_log = property(get_error_log)

0 comments on commit b2babd8

Please sign in to comment.