In [1]:
import re

In [2]:
str.strip?

[0;31mSignature:[0m [0mstr[0m[0;34m.[0m[0mstrip[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mchars[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a copy of the string with leading and trailing whitespace removed.

If chars is given and not None, remove characters in chars instead.
[0;31mType:[0m      method_descriptor


In [22]:
SPECIAL_CHARS = r"\[](){}*+?|^$."
print(SPECIAL_CHARS)

\[](){}*+?|^$.


In [23]:
s = r"t\his^ i[s] (a) {t}es|t. $or*ry?"

In [24]:
for c in SPECIAL_CHARS:
    print(c)
    s=s.replace(c, "\\"+c)
    print(s)

\
t\\his^ i[s] (a) {t}es|t. $or*ry?
[
t\\his^ i\[s] (a) {t}es|t. $or*ry?
]
t\\his^ i\[s\] (a) {t}es|t. $or*ry?
(
t\\his^ i\[s\] \(a) {t}es|t. $or*ry?
)
t\\his^ i\[s\] \(a\) {t}es|t. $or*ry?
{
t\\his^ i\[s\] \(a\) \{t}es|t. $or*ry?
}
t\\his^ i\[s\] \(a\) \{t\}es|t. $or*ry?
*
t\\his^ i\[s\] \(a\) \{t\}es|t. $or\*ry?
+
t\\his^ i\[s\] \(a\) \{t\}es|t. $or\*ry?
?
t\\his^ i\[s\] \(a\) \{t\}es|t. $or\*ry\?
|
t\\his^ i\[s\] \(a\) \{t\}es\|t. $or\*ry\?
^
t\\his\^ i\[s\] \(a\) \{t\}es\|t. $or\*ry\?
$
t\\his\^ i\[s\] \(a\) \{t\}es\|t. \$or\*ry\?
.
t\\his\^ i\[s\] \(a\) \{t\}es\|t\. \$or\*ry\?


In [16]:
print(s)

t\\his\\^ i\\[s\\] \\(a\\) \\{t\\}es\\|t\\. \\$or\\*ry\\?


In [19]:
len("\\")

1

In [None]:
instructions = """
move: move to %n:x%,%n:y% and turn %w:dir%
plant: plant a %w:color% flower
"""

In [35]:
class InstructionParser:
    """String parsing minilanguage for AoC instructions.
    
    Theoretically faster and more intutive than writing your
    own regexes every time??
    
    The language consists of a newline-delimited set of strings like
    
    move: move to %x:i,%y:i and turn %direc:w
    plant: plant %num:n %color:w flowers
    
    The first word ([a-z_]+) before the colon (and space!) is the name of the instruction,
    and the % delimited sections are variable spots.
    
    A variable spot is denoted by %name:t% where name is [a-z_]+ and t is a type character.
    
    A string will be matched against each rule in sequence, returning the first
    one it matches. If it doesn't match anything, an exception is thrown.
    
    When the parser is run on a string, it returns a (str, dict) tuple.
    The string is the name if the instruction matched,
    and the dict is a dictionary of variable names and their matched values.
    
    Type characters:
    w: lowercase word [a-z]+ (returns string)
    p: phrase of lowercase words [a-z ]+ (returns string)
    n: nonnegative integer [0-9]+ (returns int)
    i: integer -?[0-9]+ (returns int)
    
    For example, with the string example above,
    
    "move to -13,34 and turn north"
    would get parsed to
    ("move", {x:-13, y:34, direc:"north"})
    
    "plant 4 green flowers"
    would get parsed to
    ("plant", {num:4, color:"green"})
    
    Attributes:
    
    
    Methods:
    parse(str): Parse a single string into (instname, dict)
    """
    
    def __init__(self, code):
        """
        code: the input source code.
        
        Will throw an error if not properly formatted.
        """
        lines = code.strip().split("\n")
        self.rules = []
        for l in lines:
            colon_sep=l.split(": ")
            if len(colon_sep)==1:
                raise ValueError(f"Incorrectly formatted rule {line}")
            name = colon_sep[0]
            template = ": ".join(colon_sep[1:])
            
            #escape any special characters before we tinker
            for c in SPECIAL_CHARS:
                template=template.replace(c, "\\"+c)
            tokens = re.split(r"(%[a-z_]+:[wpni])", template) #split out the variable decs
            variables = []
            pattern = ["^"]
            
            for t in tokens:
                if not t:
                    continue
                elif t[0]!="%":
                    pattern.append(t)
                else:
                    print(t)
                    name, mode =  re.fullmatch(r"%([a-z_]+):([wpni])", t).groups()
                    if mode == "p": #phrase, with possible spaces
                        pattern.append(r"([a-z]+(?: [a-z]+)*)")
                    elif mode == "w": #word, no spaces
                        pattern.append(r"([a-z]+)")
                    elif mode == "n":
                        pattern.append(r"([0-9]+)")
                    elif mode == "i":
                        pattern.append(r"(-?[0-9]+)")
                    else:
                        raise ValueError(f"the mode {mode} is not supported")
                    variables.append((name, mode))
            pattern.append("$")
            self.rules.append((name, variables, re.compile("".join(pattern))))
            
        def match(self, string):
            for name, variables, pattern in self.rules:
                match = pattern.fullmatch(string)
                if not match:
                    continue
                else:
                    out = {}
                    for ((var_name, mode), var_value) in zip(variables, match.groups()):
                        if mode == "p" or mode == "w":
                            out[var_name] = var_value
                        elif mode == "n" or mode == "i":
                            out[var_name] = int(var_value)
                        else:
                            raise ValueError(f"the mode {mode} is not supported")
                    return out
            raise ValueError(f"The line {string} isn't matched by any pattern")
            
                    
                

In [36]:
": ".join("name: line: stuff".split(": ")[1:])

'line: stuff'

In [37]:
re.split(r"(%[a-z_]+:[wpni])", "move to %x:i,%y:i and turn %direc:w")

['move to ', '%x:i', ',', '%y:i', ' and turn ', '%direc:w', '']

In [38]:
rules="""
move: move to %x:i,%y:i and turn %direc:w
plant: plant %num:n %color:w flowers
"""
P=InstructionParser(rules)

%x:i


AttributeError: 'NoneType' object has no attribute 'groups'