In [None]:
!pip install lark rapidfuzz

In [None]:
from lark import Lark
from rapidfuzz import process

In [None]:
grammar = r'''
start: sentence

sentence: imperative
        | prohibitive
        | declarative
        | GREET

// Clauses

imperative: vp
prohibitive: NOT vp
declarative: np (equative_predicate | verbal_predicate)

equative_predicate: NOT? np
verbal_predicate: NOT? vp

relclause: np verbal_predicate

// Verbs and VP

verb: verb_simple | verb_compound

?verb_simple: GO
            | SEEK
            | HELP
            | OPEN
            | TALK
            | FEAR
            | MAKE
            | WAIT

?verb_compound: GO DEAD                 -> die
              | GO NOT                  -> stop
              | GO NOT DEAD             -> revive
              | SEEK NOT                -> disregard
              | GO ME                   -> come
              | GO UP                   -> ascend

// allow serial verbs and optional NP objects
vp: (verb np?)+

// Nouns, pronouns, and NP

?noun_uncountable: EXILE
                 | TOWER
                 | PEOPLE ME -> anchorites

?noun_countable: PEOPLE
               | DOOR
               | HUMAN
               | KEY
               | NOT ME -> stranger

noun: noun_countable PLURAL? | noun_uncountable

?pronoun_singular: ME | YOU

?pronoun_plural: ME PLURAL  -> us
               | YOU PLURAL -> you_all

pronoun: pronoun_singular | pronoun_plural

np: (pronoun | noun) noun* relclause?

// Terminals

NOT: "not"
GO: "go"
UP: "up"
ME: "me"
DEAD: "dead"
SEEK: "seek"
DOOR: "door"
EXILE: "exile"
FEAR: "fear"
HELP: "help"
KEY: "key"
MAKE: "make"
HUMAN: "human"
PEOPLE: "people"
OPEN: "open"
TALK: "talk"
TOWER: "tower"
WAIT: "wait"
YOU: "you"
GREET: "greet"

// Plural morpheme

PLURAL: "plural"

%import common.WS
%ignore WS
'''

In [None]:
parser = Lark(grammar, start="start", parser="earley", ambiguity="resolve")

In [None]:
tests = [
    "greet",                                   # "Greetings!"
    "me seek you",                             # "I seek you."
    "me seek not not me plural",               # "I disregard strangers."
    "people plural go not talk people plural", # "Peoples stop talking to peoples."
    "you help people me",                      # "You help the Anchorites."
    "you human me wait",                       # "You are the human I am waiting for."
    "open door plural",                        # "Open the doors."
    "not go up",                               # "Do not ascend!"
    "people exile not go dead",                # "People of Exile do not die."
    "me plural tower go not dead"              # "Our tower is revived!"
]

for s in tests:
    print("\nINPUT:", s)
    tree = parser.parse(s)
    print(tree.pretty())


INPUT: greet
start
  sentence	greet


INPUT: me seek you
start
  sentence
    declarative
      np
        pronoun	me
      verbal_predicate
        vp
          verb	seek
          np
            pronoun	you


INPUT: me seek not not me plural
start
  sentence
    declarative
      np
        pronoun	me
      verbal_predicate
        vp
          verb
            disregard
              seek
              not
          np
            noun
              stranger
                not
                me
              plural


INPUT: people plural go not talk people plural
start
  sentence
    declarative
      np
        noun
          people
          plural
      verbal_predicate
        vp
          verb
            stop
              go
              not
          verb	talk
          np
            noun
              people
              plural


INPUT: you help people me
start
  sentence
    declarative
      np
        pronoun	you
      verbal_predicate
        vp
          verb	hel

In [None]:
tests = [
    "me not go help people me",          # "I do not go to help the Anchorites"
    "me not go not help people me",      # "I do not stop helping the Anchorites"
    "me not fear help people me",        # "I do not fear helping the Anchorites"
    "me not fear not help people me",    # error
]

for s in tests:
    print("\nINPUT:", s)
    try:
      tree = parser.parse(s)
      print(tree.pretty())
    except Exception as e:
      print(e)


INPUT: me not go help people me
start
  sentence
    declarative
      np
        pronoun	me
      verbal_predicate
        not
        vp
          verb	go
          verb	help
          np
            noun
              anchorites
                people
                me


INPUT: me not go not help people me
start
  sentence
    declarative
      np
        pronoun	me
      verbal_predicate
        not
        vp
          verb
            stop
              go
              not
          verb	help
          np
            noun
              anchorites
                people
                me


INPUT: me not fear help people me
start
  sentence
    declarative
      np
        pronoun	me
      verbal_predicate
        not
        vp
          verb	fear
          verb	help
          np
            noun
              anchorites
                people
                me


INPUT: me not fear not help people me
No terminal matches 'h' in the current parser context, at line 1 col 17

me 

In [None]:
VALID_MORPHEMES = [
    "me", "you", "go", "up", "dead", "seek", "door", "exile",
    "fear", "help", "key", "make", "human", "people", "open",
    "talk", "tower", "wait", "plural", "greet", "not"
]

def fuzzy_tokenize(sentence, threshold=70):
    """
    Normalize a space-separated morpheme sequence.
    Each word is replaced with the closest valid morpheme if similarity >= threshold.
    """
    words = sentence.split()
    corrected = []
    for w in words:
        best_match, score, _ = process.extractOne(w, VALID_MORPHEMES)
        if score >= threshold:
            corrected.append(best_match)
        else:
            corrected.append(w)
    return " ".join(corrected)

In [None]:
raw_sentences = [
    "mee sek go up",
    "peopl exile not go dea",
    "opne door plural"
]

for s in raw_sentences:
    normalized = fuzzy_tokenize(s)
    print("RAW INPUT:  ", s)
    print("NORMALIZED: ", normalized)
    tree = parser.parse(normalized)
    print(tree.pretty())

RAW INPUT:   mee sek go up
NORMALIZED:  me seek go up
start
  sentence
    declarative
      np
        pronoun	me
      verbal_predicate
        vp
          verb	seek
          verb
            ascend
              go
              up

RAW INPUT:   peopl exile not go dea
NORMALIZED:  people exile not go dead
start
  sentence
    declarative
      np
        noun	people
        noun	exile
      verbal_predicate
        not
        vp
          verb
            die
              go
              dead

RAW INPUT:   opne door plural
NORMALIZED:  open door plural
start
  sentence
    imperative
      vp
        verb	open
        np
          noun
            door
            plural

