# Requirements

In [20]:
from dataclasses import dataclass
import math
import operator
import re
import sys
from types import SimpleNamespace

# Problem setting

Structural pattern matching is a very powerful feature that has the potential to make intricate code much easier to write, read and maintain.

# Simple example: RPN calculator

Consider the example of an evaluator for arithmetic expressions in reverse Polish notation.  It evalues expressions such as `('+', ('*', 3, 7), 3)` (equivalent to `3*7 + 5`), and raises an exception if an expression can't be matched.

In [2]:
class RpnEvaluator:
    
    def __init__(self):
        self._ops = {
            '+': operator.add,
            '-': operator.sub,
            '*': operator.mul,
            '/': operator.truediv,
            '^': operator.pow,
        }
        self._funcs = {
            'sqrt': math.sqrt,
        }
        
    def is_operator(self, op):
        return op in self._ops
    
    def is_function(self, func):
        return func in self._funcs
    
    def eval(self, expr):
        match expr:
            case (func, arg) if self.is_function(func):
                return self._funcs[func](self.eval(arg))
            case (op, lhs, rhs) if self.is_operator(op):
                return self._ops[op](self.eval(lhs), self.eval(rhs))
            case value if isinstance(value, int) or isinstance(value, float):
                return float(value)
            case _:
                raise ValueError(f'{expr} can not be matched')

For convenience, we use the functions defined in the `operator` module, it would of course be possible to implement this using lambda functions as well.

Now you can instantiate an evaluator.

In [3]:
evaluator = RpnEvaluator()

## Examples

In [4]:
evaluator.eval(('+', ('*', 3, 7), 3))

24.0

In [5]:
evaluator.eval(('*', ('sqrt', 5), ('sqrt', 5)))

5.000000000000001

In [6]:
evaluator.eval(('^', 2, 3))

8.0

## Error handling

When you try to evaluate an expression that contains errors, the matching fails.

In [8]:
try:
    evaluator.eval(('+', 'a', 5))
except ValueError as e:
    print(e)

a can not be matched


In [9]:
try:
    evaluator.eval(('**', 3, 5))
except ValueError as e:
    print(e)

('**', 3, 5) can not be matched


In [10]:
try:
    evaluator.eval(('+', 3))
except ValueError as e:
    print(e)

('+', 3) can not be matched


# Matching regular expressions

There is no direct support for matching regular expressions using Python's `match` statement.  However, `match` relies on `__eq__`, so defining a class that implements that will do the trick.

In [14]:
@dataclass
class ReEqual(str):
    
    text: str
    match: re.Match = None
    
    def __eq__(self, pattern):
        self.match = re.search(pattern, self.text)
        return self.match is not None

In [15]:
ReEqual('E5') == '^[A-H][1-8]$'

True

In [16]:
ReEqual('E9') == '^[A-H][1-8]$'

False

In [19]:
for pos in ('A3', 'B1', 'C8', 'D7', 'A9', 'B11', 'AA5'):
    match ReEqual(pos):
        case r'^[A-H][1-2]$':
            print(f'{pos}: white home row')
        case r'^[A-H][7-8]$':
            print(f'{pos}: black home row')
        case r'^[A-H][1-8]$':
            print(f'{pos}: mid field')
        case _:
            print(f'{pos}: invalid position', file=sys.stderr)

A3: mid field
B1: white home row
C8: black home row
D7: black home row


A9: invalid position
B11: invalid position
AA5: invalid position


## Alternative

An alternative approach would be to create a class that represents the pattern to match, rather then the string to match.

In [35]:
@dataclass
class ReMatcher:
    pattern: str
    
    def __eq__(self, string):
        return re.match(self.pattern, string) is not None

In [36]:
categories = SimpleNamespace(
    white_home=ReMatcher('^[A-H][1-2]$'),
    black_home=ReMatcher('^[A-H][7-8]$'),
    midfield=ReMatcher('^[A-H][3-6]$'),
)

In [38]:
for pos in ('A3', 'B1', 'C8', 'D7', 'A9', 'B11', 'AA5'):
    match pos:
        case categories.white_home:
            print(f'{pos}: white home row')
        case categories.black_home:
            print(f'{pos}: black home row')
        case categories.midfield:
            print(f'{pos}: mid field')
        case _:
            print(f'{pos}: invalid position', file=sys.stderr)

A3: mid field
B1: white home row
C8: black home row
D7: black home row


A9: invalid position
B11: invalid position
AA5: invalid position


The `SimpleNamespace` is required because just using `fruit` or `vegetable` variables would capture the match.

# Set membership

Just like for regular expressions, set membership is also not supported by Python's `match` statement, but a similar trick can be applied.

In [29]:
class SetEqual(set):
    
    def __eq__(self, text):
        return text in self

In [30]:
SetEqual({'apple', 'pear', 'cherry'}) == 'apple'

True

In [31]:
SetEqual({'apple', 'pear', 'cherry'}) == 'nut'

False

In [33]:
food = SimpleNamespace(
    fruit=SetEqual({'apple', 'pear'}),
    vegatable=SetEqual({'sprout', 'salad'}),
)

In [34]:
for stuff in ['apple', 'nut', 'sprout', 'salad', 'pear', 'parsnip']:
    match stuff:
        case food.fruit:
            print(f'{stuff} is fruit')
        case food.vegatable:
            print(f'{stuff} is vegetable')
        case _:
            print(f'{stuff} is unknown', file=sys.stderr)

apple is fruit
sprout is vegetable
salad is vegetable
pear is fruit


nut is unknown
parsnip is unknown


The `SimpleNamespace` is required because just using `fruit` or `vegetable` variables would capture the match.

# Matching types

Again, matching against built-in types is not entirely straightforward, but it is supported by the `match` statements.

In [40]:
for item in ('abc', 3, '', 5.2, -8, {}):
    match item:
        case str():
            print(f'"{item}" is a string')
        case int():
            print(f'{item} is an integer')
        case float():
            print(f'{item} is a float')
        case _:
            print(f'no idea what {item} is', file=sys.stderr)

"abc" is a string
3 is an integer
"" is a string
5.2 is a float
-8 is an integer


no idea what {} is
