## Requirements

In [1]:
import enum
import re
import typing
import parser

## Implementation

The parser is implemented in the module `parser.py`.

In [4]:
%pycat parser.py

[38;5;66;03m#!/usr/bin/env python[39;00m

[38;5;28;01mimport[39;00m enum
[38;5;28;01mimport[39;00m re
[38;5;28;01mimport[39;00m typing


[38;5;28;01mclass[39;00m State(enum.Enum):
    start = enum.auto()
    in_block = enum.auto()
    not_in_block = enum.auto()


Item: typing.TypeAlias = str
Block: typing.TypeAlias = list[Item]
Blocks: typing.TypeAlias = dict[str, Block]


[38;5;28;01mclass[39;00m Parser:

    [38;5;28;01mdef[39;00m __init__(self):
        self._state = State.start
        self._comment_str = [33m'#'[39m
        self._current_block_name = [38;5;28;01mNone[39;00m
        self._block_begin_re = re.compile([33mr'begin\s+block\s+(\w+)\s*$'[39m)
        self._block_end_re = re.compile([33mr'end\s+block\s+(\w+)\s*$'[39m)
        self._item_re = re.compile([33mr'item\s+(\w+)\s*$'[39m)
        self._blocks = [38;5;28;01mNone[39;00m

    [38;5;28;01mdef[39;00m _is_comment(self, line: str) -> bool:
        [38;5;28;01mreturn[39;00m line.startswith(s

## Usage

Create a new parser.

In [None]:
parser = parser.Parser()

Parse a file.

In [3]:
%cat data/valid_blcok_data.txt

begin block b1
  item v1
  item v2
end block b1

begin block b2
  item v3
  # comment on v4
  item v4
  item v5
end block b2

# comment on b3, empty block
begin block b3
end block b3

In [None]:
blocks = parser.parse('data/valid_blcok_data.txt')

Check whether the parser worked as expected.

In [2]:
assert len(blocks) == 3, f'3 blocks expected, got {len(blocks)}'
assert set(blocks.keys()) == set(('b1', 'b2', 'b3')), f'unexpected block names {set(blocks.keys())}'
for i, nr_items in enumerate((2, 3, 0)):
    assert len(blocks[f'b{i + 1}']) == nr_items, 'expected {nr_items} for b{i + 1}'

See the `test_parser.py` file for more comprehensive tests.