Permalink
Browse files

Initial selector commit; Basic functionality tested & working!

  • Loading branch information...
1 parent 11efb3a commit 575175566382ef5b774d6a7386eaf87fb60d4f16 Erez Sh committed Feb 1, 2012
Showing with 345 additions and 10 deletions.
  1. +34 −0 grammars/selector.g
  2. +8 −7 plyplus.py
  3. +144 −0 selector.py
  4. +42 −0 selectors.txt
  5. +38 −3 strees.py
  6. +79 −0 test/selector_test.py
View
@@ -0,0 +1,34 @@
+start: selector;
+selector: selector_op? elem;
+selector_op: selector operator?;
+operator: '>' | '\+' | '~';
+
+elem: (elem_head | elem_class | elem_regexp | elem_any | LPAR selector_list RPAR) modifier? yield?;
+
+modifier: modifier_name (LPAR index RPAR)?;
+
+modifier_name: MODIFIER;
+index: INDEX;
+yield: '!';
+
+elem_head: HEAD;
+elem_class: CLASS;
+elem_regexp: REGEXP;
+elem_any: '\*';
+selector_list: selector (',' selector)@*;
+
+HEAD: '[a-z_][a-z_0-9]*';
+CLASS: '\.[a-z_][a-z_0-9]*';
+MODIFIER: ':[a-z][a-z-]*';
+
+INDEX: '\\d+';
+LPAR: '\(';
+RPAR: '\)';
+
+REGEXP: '/.*?[^\\]/';
+
+WS: '[ \t\f]+' (%ignore);
+
+###
+from selector import STreeSelector
+self.STree = STreeSelector
View
@@ -1,4 +1,5 @@
import re, os
+import types
from ply import lex, yacc
@@ -325,7 +326,7 @@ class FilterTokens_Tranformer(STransformer):
def __default__(self, tree):
if len(tree.tail) <= 1:
return tree
- return STree(tree.head, [x for x in tree.tail if is_stree(x)])
+ return tree.__class__(tree.head, [x for x in tree.tail if is_stree(x)])
class TokValue(str):
#def __repr__(self):
@@ -454,14 +455,14 @@ def __init__(self, grammar_tree, source_name, tab_filename, **options):
ply_grammar, code = ply_grammar_and_code
+ exec(code)
+
for x in ply_grammar:
type, (name, defin) = x.head, x.tail
assert type in ('token', 'token_with_mods', 'rule', 'option'), "Can't handle type %s"%type
handler = getattr(self, '_add_%s' % type)
handler(name, defin)
- exec(code)
-
# -- Build lexer --
lexer = lex.lex(module=self)
lexer = LexerWrapper(lexer, newline_tokens_names=self._newline_tokens, newline_char=self._newline_value, ignore_token_names=self._ignore_tokens)
@@ -582,14 +583,14 @@ def _add_rule(self, rule_name, rule_def):
self.rules_to_flatten.append( rule_name )
if '?' in mods or '@' in mods: # @ is here just for the speed-up
- code = '\tp[0] = STree(%r, p[1:]) if len(p)>2 else p[1]' % rule_name
+ code = '\tp[0] = self.STree(%r, p[1:]) if len(p)>2 else p[1]' % (rule_name,)
else:
- code = '\tp[0] = STree(%r, p[1:])' % rule_name
- s = ('def p_%s(p):\n\t%r\n%s\nx = p_%s\n'
+ code = '\tp[0] = self.STree(%r, p[1:])' % (rule_name,)
+ s = ('def p_%s(self, p):\n\t%r\n%s\nx = p_%s\n'
%(rule_name, rule_def, code, rule_name))
exec(s)
- setattr(self, 'p_%s'%rule_name, x)
+ setattr(self, 'p_%s'%rule_name, types.MethodType(x, self))
@staticmethod
View
@@ -0,0 +1,144 @@
+import re, os
+from itertools import chain
+
+from strees import STree, is_stree
+from plyplus import Grammar
+
+def sum_list(l):
+ return list(chain(*l))
+
+class _Match(object):
+ def __init__(self, matched, selector):
+ self.match_track = [(matched, selector)]
+
+ def __hash__(self):
+ return hash(tuple(self.match_track))
+ def __eq__(self, other):
+ return self.match_track == other.match_track
+
+ @property
+ def last_elem_matched(self):
+ return self.match_track[0][0]
+
+ def extend(self, other):
+ self.match_track += other.match_track
+
+ def get_result(self):
+ yields = [m for m,s in self.match_track if s.head=='elem' and len(s.tail)>1 and s.tail[-1].head=='yield']
+ assert len(yields) <=1, yields
+ return yields[0] if yields else self.match_track[-1][0]
+
+
+class STreeSelector(STree):
+ def match__elem_head(self, other):
+ [expected_head] = self.tail
+ return [other] if other.head == expected_head else []
+
+ def match__elem_class(self, other):
+ raise NotImplementedError('Classes not implemented yet')
+
+ def match__elem_any(self, other):
+ return [other]
+
+ def match__elem_regexp(self, other):
+ if is_stree(other):
+ s = other.head
+ else:
+ s = other # hopefully string
+ [regexp] = self.tail
+ assert regexp[0] == regexp[-1] == '/'
+ regexp = regexp[1:-1]
+ return [other] if re.match(regexp, s) else []
+
+ def match__modifier(self, other):
+ assert self.tail[0].head == 'modifier_name' and len(self.tail[0].tail) == 1
+ modifier_name = self.tail[0].tail[0]
+
+ if modifier_name == ':is-parent':
+ return [other] if (is_stree(other) and other.tail) else []
+ elif modifier_name == ':is-leaf':
+ return [other] if not is_stree(other) else []
+
+ raise NotImplementedError("Didn't implement %s yet" % modifier_name)
+
+ def match__selector_list(self, other):
+ res = sum_list(kid._match([other]) for kid in self.tail)
+ return [r.get_result() for r in res] # lose match objects, localize yields
+
+ def match__elem(self, other):
+ matches = self.tail[0]._match(other)
+ if len(self.tail)>1 and self.tail[1].head=='modifier':
+ matches = filter(self.tail[1].match__modifier, matches)
+ return [_Match(m, self) for m in matches]
+
+ def _travel_tree_by_op(self, tree, op):
+ try:
+ if op == '>': # travel to parent
+ yield tree.parent()
+ elif op == '~': # travel to previous adjacent sibling
+ new_index = tree.index_in_parent - 1
+ if new_index < 0:
+ raise IndexError('We dont want it to overflow back to the last element')
+ yield tree.parent().tail[new_index]
+ elif op == '+': # travel to all previous siblings
+ for x in tree.parent().tail[ :tree.index_in_parent ]:
+ yield x
+ elif op == ' ': # travel back to root
+ to_check = []
+ parent = tree.parent()
+ while parent is not None:
+ yield parent
+ parent = parent.parent() if parent.parent else None
+
+ except (IndexError, ) as e:
+ pass
+
+
+ def _match_selector_op(self, matches_so_far):
+ selector = self.tail[0]
+ op = self.tail[1].tail[0] if len(self.tail) > 1 else ' '
+
+
+ matches_found = []
+ for match in matches_so_far:
+ to_check = list(self._travel_tree_by_op(match.last_elem_matched, op))
+
+ if to_check:
+ for match_found in selector.match__selector(to_check):
+ match_found.extend( match )
+ matches_found.append( match_found )
+
+ return matches_found
+
+ def match__selector(self, other):
+ elem = self.tail[-1]
+ if is_stree(other):
+ res = sum_list(other.map(elem._match))
+ else:
+ res = sum_list([elem._match(item) for item in other]) # we were called by a selector_op
+ if not res:
+ return [] # shortcut
+
+ if self.tail[0].head == 'selector_op':
+ res = self.tail[0]._match_selector_op(res)
+
+ return res
+
+ def match__start(self, other):
+ assert len(self.tail) == 1
+ return self.tail[0]._match(other)
+
+ def _match(self, other):
+ if hasattr(self, 'match__%s' % self.head):
+ return getattr(self, 'match__%s' % self.head)(other)
+ return []
+
+ def match(self, other):
+ other.calc_parents() # TODO add caching?
+ return [x.get_result() for x in self._match(other)]
+
+
+
+selector_grammar = Grammar(file(os.path.join(os.path.split(__file__)[0], 'grammars/selector.g')))
+def selector(s):
+ return selector_grammar.parse(s)
View
@@ -0,0 +1,42 @@
+Basic idea is to do something like:
+>>> names = tree.find('func_def > name *:is-leaf')
+for a list of function names
+but what if we want a list of functions with names? (for example non-lambdas)
+Of course we can just use a 'parent' attribute, but
+a) we don't have one right now
+b) it can get icky quickly when the selector is complex
+c) and we want the selector to express everything
+So it should be possible to define which element it returns within the selector.
+
+So syntax:
+
+element can be
+ 1. name
+ 2. .class (to be added. note similar names always have similar classes)
+ 3. (list, of, selectors)
+ 4. /leaf_regex_match/
+ 5. *
+
+elements can have modifiers:
+ 1. :not
+ 2. :index(x)
+ 3. :index-for-type(x)
+ 4. :is-parent
+ 5. :is-leaf
+
+the can also have an operator:
+ 6. element! yields that element match (instead of just last one specified)
+ It can be used many times inside a selector, yielding each element.
+
+a selector is a sequence of elements, separated by operators:
+ 1. e1 e2 -> any e2 contained in e1
+ 2. e1>e2 -> any e2 with a parent e1
+ 3. e1+e2 -> any e2 immediately preceeded by e1
+ 4. e1~e2 -> any e2 preceeded by e1
+
+examples:
+ classdef > (funcdef, classdef)! > name
+ yields all named functions and classes immediately nested in a class
+
+ name > /[Aa].*/
+ returns all and any names starting with the letter 'a'
View
@@ -1,6 +1,7 @@
+from weakref import ref
class STree(object):
- __slots__ = 'head', 'tail'
+ #__slots__ = 'head', 'tail'
def __init__(self, head, tail):
self.head = head
@@ -19,12 +20,35 @@ def __len__(self):
raise Exception('len')
def __nonzero__(self):
return True # XXX ???
+ def __hash__(self):
+ return hash((self.head, tuple(self.tail)))
def __eq__(self, other):
- return self.head == other.head and self.tail == other.tail
+ try:
+ return self.head == other.head and self.tail == other.tail
+ except AttributeError:
+ return False
def __repr__(self):
return '%s(%s)' % (self.head, ', '.join(map(repr,self.tail)))
+ def find_predicate(self, predicate):
+ l = []
+ if predicate(self):
+ l.append(self)
+ for kid in self.tail:
+ l += kid.find_predicate(predicate)
+ return l
+ def map(self, func, context=None):
+ if context is None:
+ context = [ func(self) ]
+ for kid in self.tail:
+ try:
+ kid.map(func, context)
+ except AttributeError:
+ pass
+ context.append( func(kid) )
+ return context
+
def _to_pydot(self, graph):
import pydot
color = hash(self.head) & 0xffffff
@@ -50,6 +74,17 @@ def to_png_with_pydot(self, filename):
graph = pydot.Dot(graph_type='digraph', rankdir="LR")
self._to_pydot(graph)
graph.write_png(filename)
+
+ def calc_parents(self):
+ self.parent = None
+ self.index_in_parent = None
+ for i, kid in enumerate(self.tail):
+ try:
+ kid.calc_parents()
+ except AttributeError:
+ pass
+ kid.parent = ref(self)
+ kid.index_in_parent = i
def is_stree(obj):
@@ -85,7 +120,7 @@ def _transform(self, tree):
for branch in tree.tail
]
- tree = STree(tree.head, branches)
+ tree = tree.__class__(tree.head, branches)
f = getattr(self, tree.head, self.__default__)
return f(tree)
Oops, something went wrong.

0 comments on commit 5751755

Please sign in to comment.