https://www.hackerrank.com/challenges/count-strings/problem
    
    
A regular expression is used to describe a set of strings. For this problem the alphabet is limited to 'a' and 'b'.

We define *R* to be a valid regular expression if: ...

In [19]:
from typing import List
from collections import namedtuple

In [101]:
def countStrings(r: str, l: int) -> int:
    """
    Given a regular expression and an integer, *L*, count how many strings of length *L* are recognized by it.
    """
    n: int = len(r)
    
    OPEN_PAREN = '('
    CLOSE_PAREN = ')'
    UNION_CHAR = '|'
    STAR_CHAR = '*'
        
    # elems - list of strings or Nodes matching this node element or sub-nodes
    # starred - boolean indicating if this node element can occur 0 or more times
    Node = namedtuple('Node', ['left', 'right', 'starred'])
    
    # a stack of chars and Nodes
    stack = []
    
    def setize(str_or_set) -> set:
        return set(str_or_set) if isinstance(str_or_set, str) else set(e for e in str_or_set)
    
    def trunc_str(s) -> str:
        trunc = f'{s}'
        trunc = trunc[:100] + ('....' if len(trunc) > 100 else '')
        return trunc
    
    def build_node(first_elem, second_elem, union: bool) -> List[str]:
        """Process 2 elements (chars or Nodes) into a single Node with potential descendant Nodes."""
        print(f'build_node({trunc_str(first_elem)}, {trunc_str(second_elem)}, {union})')
        
        # union of 2 nodes or 2 chars or a char and a node
        if union:
            return_set = setize(first_elem)
            return_set.update(setize(second_elem))
            return return_set
            #return Node(first_elem, second_elem, starred=False)
        
        # starred char or starred node
        elif second_elem == STAR_CHAR:
            
            all_counts = {""}
            one_count = setize(first_elem)
            all_counts.update(one_count)
            print(f'one_count: {one_count}')
            next_count = one_count
            #print(f'next_count = {next_count}')
            while next_count:
                next_count = set(a + b for a in next_count for b in one_count)
                next_count = set(filter(lambda x: len(x) <= l, next_count))
                #print(f'len(next_count) = {len(next_count)}')
                all_counts.update(next_count)
            
            print(f'len(all_counts) = {len(all_counts)}; max(len(all_counts)) = {max(len(x) for x in all_counts)}')
            return all_counts
            #return Node(first_elem, None, starred=True)
        
        # concatenation of chars (unnecessary, handled by `else` below)
        #elif isinstance(first_elem, str) and isinstance(second_elem, str):
            #return set(first_elem + second_elem)
            #return Node(first_elem + second_elem, None, starred=False)
        
        # concatenation of any other pair (cartesian product of strings)
        else:
            return set(a + b for a in setize(first_elem) for b in setize(second_elem))
            #return Node((first_elem, second_elem), None, starred=False)
        
    possible_strings = set()
    
    for i, ch in enumerate(r):
        
        # pop top 2 (or 3 if middle is a `|`) elements off the stack and combine them
        if ch == CLOSE_PAREN:
            second_elem = stack.pop()
            first_elem = stack.pop()
            union = first_elem == UNION_CHAR
            if union:
                first_elem = stack.pop()
                
            # also pop off the open parentheses, just as a sanity check
            if stack.pop() != OPEN_PAREN:
                raise Exception(f'Malformed regex at index {i}, char \'{c}\': {r}')
                
            possible_strings = build_node(first_elem, second_elem, union)
            srt = [x for x in possible_strings]
            srt.sort()
            print(f'Built new node (len={len(possible_strings)}): {trunc_str(srt)}')
            
            possible_strings = set(filter(lambda x: len(x) <= l, possible_strings))
            srt = [x for x in possible_strings]
            srt.sort()
            print(f'Filtered node (len={len(possible_strings)}): {trunc_str(srt)}')
            
            stack.append(possible_strings)
            
        else:
            stack.append(ch)
            
    print(f'Total strings <= len={l} = {len(possible_strings)}')
    possible_strings = set(filter(lambda x: len(x) == l, possible_strings))
    print(f'Total strings == len={l} = {len(possible_strings)}\n\n')
            
    return len(possible_strings)

In [104]:
assert(countStrings('((ab)|(ba))', 2) == 2)
assert(countStrings('((a|b)*)', 5) == 32)
assert(countStrings('((a*)(b(a*)))', 100) == 100)

build_node(a, b, False)
Built new node (len=1): ['ab']
Filtered node (len=1): ['ab']
build_node(b, a, False)
Built new node (len=1): ['ba']
Filtered node (len=1): ['ba']
build_node({'ab'}, {'ba'}, True)
Built new node (len=2): ['ab', 'ba']
Filtered node (len=2): ['ab', 'ba']
Total strings <= len=2 = 2
Total strings == len=2 = 2


build_node(a, b, True)
Built new node (len=2): ['a', 'b']
Filtered node (len=2): ['a', 'b']
build_node({'b', 'a'}, *, False)
one_count: {'b', 'a'}
len(all_counts) = 63; max(len(all_counts)) = 5
Built new node (len=63): ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa', 'aaaab', 'aaab', 'aaaba', 'aaabb', 'aab', 'aaba', 'aabaa', '....
Filtered node (len=63): ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa', 'aaaab', 'aaab', 'aaaba', 'aaabb', 'aab', 'aaba', 'aabaa', '....
Total strings <= len=5 = 63
Total strings == len=5 = 32


build_node(a, *, False)
one_count: {'a'}
len(all_counts) = 101; max(len(all_counts)) = 100
Built new node (len=101): ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa', 