In [None]:
import itertools
import lepl as l
from handshapes import handshapelist

ModuleNotFoundError: No module named 'analysis'

In [5]:
class AttrNode(l.Node):
  """Node that merges attributes and text fragments of children"""
  def __init__(self, *args, **kw):
    """Creates a new node.
       The arguments are the same as for a LEPL Node constructor.
       In addition, all named arguments are collected in the
       attributes dictionary and pushed up to parent nodes.
       Any keyword arguments are added to the attributes dictionary,
       and pushed up, but not passed as arguments to the superclass.
    """
    self.attributes = dict()
    self.fragments = []
    for arg in args:
      if isinstance(arg, AttrNode):
        self.fragments.extend(arg.fragments)
        attributes = getattr(arg, 'attributes', dict())
        for (attr, values) in attributes.iteritems():
          self._merge_attr(attr, values)
      elif l.is_named(arg):
        (attr, value) = arg
        self.fragments.append(value)
        self._merge_attr(attr, value)
      else:
        self.fragments.append(arg)
    for (attr, value) in kw.iteritems():
      self._merge_attr(attr, value)
    super(AttrNode, self).__init__(*args)
  
  def text(self):
    """Returns the normalized text corresponding to the node,
       which has any location indices normalized to i, j, k, ...
    """
    index = itertools.count()
    def to_str(x):
      if isinstance(x, IndexPlaceholder):
        return chr(ord('i') + index.next())
      else:
        return str(x)
    return ''.join(to_str(v) for v in self.fragments)

  def _merge_attr(self, attr, value):
    if not self.attributes.has_key(attr):
      self.attributes[attr] = []
    if isinstance(value, list):
      self.attributes[attr].extend(value)
    else:
      self.attributes[attr].append(value)


In [6]:
class SignComponent(AttrNode):
  """Represents a unit in a possibly compound sign"""
  def __init__(self, *args):
    """The arguments are the same as for a LEPL Node"""
    super(SignComponent, self).__init__(*args)
    self._merge_attr('components', ''.join(str(f) for f in self.fragments))

In [7]:
class Nucleus(AttrNode):
  """Captures the nucleus of a gloss, and sets the "nucleus"
     attribute correspondingly.
  """
  def __init__(self, *args):
    """The arguments are the same as for a LEPL Node"""
    super(Nucleus, self).__init__(*args)
    self.attributes['nucleus'] = [''.join(str(f) for f in self.fragments if not isinstance(f, NonNuclearFragment))]

In [8]:
class TypeBase(AttrNode):
  """Base class for all sign types. Pushes the "type" attribute
     with the gloss's sign type.
  """
  def __init__(self, *args):
    super(TypeBase, self).__init__(*args, type=self.TYPE)
  
  @classmethod
  def type(cls, typename):
    """Creates a new sign type class with the given type name""" 
    class Type(TypeBase):
      TYPE = typename
    return Type

In [9]:
LexicalSign = TypeBase.type('LEX') 
Classifier = TypeBase.type('CL')
Fingerspelled = TypeBase.type('FS')
NameSign = TypeBase.type('NS')
LoanSign = TypeBase.type('LS')
Gesture = TypeBase.type('GES')
IndexSign = TypeBase.type('IX') 

In [10]:
class IndexPlaceholder(object):
  """Placeholder for a location index in the node fragments.
     This class exists, because at the time a location index
     j, k, x, y, etc. is parsed, we do not yet know what letter
     will represent the index in the final normalized gloss.
     This letter assignment can be done only when all location
     indices have been parsed and ordered. Hence, we keep the
     placeholder in the fragments list and replace it with the
     normalized index letter in AttrNode.text(). 
  """
  def __init__(self, index):
    """Creates a placeholder for the given parsed index letter"""
    super(IndexPlaceholder, self).__init__()
    self.original_index = index

In [11]:
class LocationIndex(AttrNode):
  """Base class for location indices."""
  def __init__(self, attr, index):
    super(LocationIndex, self).__init__((attr, IndexPlaceholder(index)))

In [12]:
class SubjectLocationIndex(LocationIndex):
  """Location index denoting a subject (at the beginning of a sign)"""
  def __init__(self, index):
    super(SubjectLocationIndex, self).__init__('subject_loc', index)

In [13]:
class ObjectLocationIndex(LocationIndex):
  """Location index denoting an object (at the end of a sign)"""
  def __init__(self, index):
    super(ObjectLocationIndex, self).__init__('object_loc', index)

In [14]:
class NonNuclearFragment(object):
  """Represents a fragment that should be displayed in the normalized gloss,
     but omitted in the nucleus of the sign
  """
  def __init__(self, text):
    self.text = text
  
  def __str__(self):
    return self.text

In [15]:
class NonNuclearNode(AttrNode):
  """Represents an attribute node that should be part of the normalized
     sign, but not of the nucleus.
  """
  def __init__(self, *args, **kw):
    """Constructs a non-nuclear node. All fragments are mapped to
       their nonnuclear versions.
    """
    super(NonNuclearNode, self).__init__(*args, **kw)
    self.fragments = [NonNuclearFragment(f) for f in self.fragments]

In [16]:
class NonTextualNode(AttrNode):
  """Represents an attribute node whose text should not be
     merged into the normalized sign at all.
  """
  def __init__(self, *args, **kw):
    """Constructs a non-textual node. All fragment texts are suppressed"""
    super(NonTextualNode, self).__init__(*args, **kw)
    self.fragments = []
  
  def __str__(self):
    return ""

In [17]:
number_regexp =  r"""(?x)                   # a number contains:
                   [0-9]+                   #   one or more digits  
                   (?: \.[0-9]+ )?          #   optionally followed by decimal dot and more digits
                   s?                       #   optionally followed by 's' (e.g. "1990s")
                 """
alpha_regexp =   r"""(?x)                   # an alphanumeric word:
                   (?!                      #   never has index sign prefixes of the form:
                     (?: THUMB- )?          #     THUMB-IX-/THUMB-POSS-/THUMB-SELF-
                     (?:                    #   or of the form
                         IX- |              #     IX-
                         POSS- |            #     POSS-
                         SELF-              #     SELF-
                     )  
                   )                        # and contains:
                   [A-Z0-9]                 #   one uppercase letter or digit
                   (?:                      #   optionally followed by:
                     [A-Z0-9'-]*            #     more uppercase letters and digits, and punctuation characters,
                     [A-Z0-9]               #     where the last character may not be punctuation
                   )?
                   (?:                      # and is optionally followed by:
                     \.                     #   a trailing . (e.g., ETC.)
                   |                        # or
                     :[0-9]                 #   a trailing colon and digit (e.g. COUNT-ON-FINGERS:2)
                   )?
                 """
lookahead_regexp = r"""(?x)                 # a word may not be followed by:
                   (?:
                     (?! [a-z] )            #   any lowercase letters (to prevent e.g. AGE-1p from matching as AGE-1),              
                   |                        # with the exception of:
                     (?= wg )               #   wg, describing a wiggle sign (e.g., MOTHERwg)
                   )
                 """
word_all_regexp = r"""(?x)                  # all put together, a word may be:
                   (?: %s | %s )            #   either a number or an alphanumeric word,
                   %s                       #   not followed by lowercase letters, except for wg
                 """    % (number_regexp, alpha_regexp, lookahead_regexp)

In [None]:
word = l.Regexp(word_all_regexp)
wiggle = (l.Literal('wg') > 'wiggle') > NonNuclearNode
cl_prefix = l.Or(*[l.Literal(s) for s in ["CL", "DCL", "LCL", "SCL", "BCL", "BPCL", "PCL", "ICL"]]) > 'classifier'
uncertain = l.Literal("?")
ns_prefix = l.Literal("ns") | (l.Literal("NS") ** l.make_error("{stream_in}: Name-sign prefix must be lowercase"))
fs_prefix = l.Literal("fs") | (l.Literal("FS") ** l.make_error("{stream_in}: Fingerspelling prefix must be lowercase"))
lex_exceptions = l.Literal('"WHAT"') | l.Literal("part:indef")
handedness = l.Literal("1h") | l.Literal("2h") > 'handedness'
alternate = l.Literal('alt') > 'alternate'
handshape = l.Word(l.Letter() | l.Digit(), l.Letter() | l.Digit() | l.Any(" -/")) > 'handshape'
text = l.AnyBut('\r\n"')[1:,...]
aspect_text = l.Word(l.Lower()) > 'aspect'
thumb = l.Literal("THUMB")
index_core_ix = l.Literal("IX")
other_index_core =  l.Literal("POSS") | l.Literal("SELF")
subject_person = l.Any("123") + "p" > 'subject_person'
object_person = l.Any("123") + "p" > 'object_person'
plural = l.Literal('pl') > 'plural'
arc = l.Literal('arc') > 'arc'
loc = l.Literal('loc') > 'loc'
simple_subject_location = l.Lower() > SubjectLocationIndex
simple_object_location = l.Lower() > ObjectLocationIndex
plural_number = l.Digit()
plural_location = l.Lower() > ObjectLocationIndex
plus = l.Literal('+')
neu = l.Literal('neu') > 'neutral_space'
stress = (l.Literal('!') > 'stress') > NonTextualNode
choice = l.Literal('/') > 'choice'
caret = l.Literal('^') > 'contraction'
compound_plus = l.Literal('+') > 'compound'

In [None]:
HANDCONFIG = handshape & (">" & handshape)[0:1]
HANDEDNESS = "(" & handedness & ")" & (alternate & ('.' | l.Lookahead("(")))[0:1]
# The lookahead disambiguates handedness and hand configuration. HC triggers only
# if the relevant text does not also match handednesss
SIGN_HS = ~l.Lookahead(HANDEDNESS) & "(" & HANDCONFIG & ")"
CLASSIFIER_HS = ":" & HANDCONFIG
SUBJ_INDEX = neu | subject_person | simple_subject_location
OBJ_INDEX = neu | simple_object_location
DEFINITE_PLURAL = plural_number
PL_QUANTIFIER = plural_location & ("/" & plural_location)[0:]
PL_ARC = ('-' & DEFINITE_PLURAL)[0:1] & '-' & arc
PL_LOC = ('-' & DEFINITE_PLURAL)[0:1] & ':' & PL_QUANTIFIER
PLURAL = plural & ((PL_ARC & (':' & OBJ_INDEX)[0:1]) | PL_LOC)
# The lookahead disambiguates aspect and plural. Aspect triggers only if
# the relevant text does not also match a plural
ASPECT = ~l.Lookahead(PLURAL) & aspect_text & ('-' & aspect_text)[0:]
REPETITION = plus[1:] > 'reduplication'

BASE = word & ("-" & word)[0:] & wiggle[0:1]
CHOICE = BASE & (choice & BASE)[1:]
LOAN = "#" & BASE > LoanSign
FS = fs_prefix & "-" & BASE > Fingerspelled
LEXICAL = lex_exceptions | CHOICE | BASE > LexicalSign
NS = ns_prefix & "-" & (lex_exceptions | LOAN | FS | CHOICE | BASE) > NameSign
SIGN_CORE = (NS | LOAN | FS | LEXICAL) >> l.throw
CONTRACTION = caret & SIGN_CORE
SIGN_UNIT = SIGN_CORE & CONTRACTION[0:1] > SignComponent
COMPOUND = compound_plus & SIGN_UNIT
CLASSIFIER = cl_prefix & (~uncertain)[0:1] & CLASSIFIER_HS[0:1] & '"' & text  & '"' > Classifier
CLASSIFIER_NUCLEUS = CLASSIFIER > Nucleus
SIGN_NUCLEUS = SIGN_UNIT & COMPOUND[0:] > Nucleus
SIGN_WITH_OPT_HS = CLASSIFIER_NUCLEUS | (SIGN_HS[0:1] & SIGN_NUCLEUS)
SIGN_MODIFIERS = ('-' & ASPECT)[0:1] & ('-' & object_person)[0:1] & (('-' & PLURAL) | (':' & OBJ_INDEX))[0:1]
SIGN = (SUBJ_INDEX & ':')[0:1] & HANDEDNESS[0:1] & SIGN_WITH_OPT_HS & ((REPETITION[1] & SIGN_MODIFIERS & PL_LOC[0:1]) | (SIGN_MODIFIERS & REPETITION[0:1]))

INDEX_IX = (thumb[0:1] & "-")[0:1] & index_core_ix

INDEX_CORE = INDEX_IX | other_index_core
INDEX_NUCLEUS = INDEX_CORE & '-' & object_person > Nucleus
INDEX_LOCATION = ':' & OBJ_INDEX
INDEX_PL = '-' & PLURAL
INDEX_WITH_OPT_HS = SIGN_HS[0:1] & INDEX_NUCLEUS 
INDEX = HANDEDNESS[0:1] & INDEX_WITH_OPT_HS & REPETITION[0:1] & (INDEX_PL | INDEX_LOCATION)[0:1] > IndexSign

INDEX_LOC_NUCLEUS = INDEX_IX & '-' & loc > Nucleus
INDEX_LOC_PL = '-' & arc
INDEX_LOC_LOCATION = ':' & OBJ_INDEX
INDEX_LOC_WITH_OPT_HS = SIGN_HS[0:1] & INDEX_LOC_NUCLEUS 
INDEX_LOC = HANDEDNESS[0:1] & INDEX_LOC_WITH_OPT_HS & INDEX_LOC_PL[0:1] & ('"' & text & '"')[0:1] & REPETITION[0:1] & INDEX_LOC_LOCATION[0:1] > IndexSign

GESTURE_NUCLEUS = HANDEDNESS[0:1] & (SIGN_HS | HANDCONFIG)[0:1] & '"' & text & '"' > Nucleus
GESTURE = GESTURE_NUCLEUS & REPETITION[0:1] & INDEX_LOC_LOCATION[0:1] > Gesture

ALLSIGNS = INDEX_LOC | INDEX | SIGN | GESTURE
GLOSS = ALLSIGNS & l.Eos() > AttrNode

NameError: name 'SIGN_MODIFIERS' is not defined