In [1]:
from pathlib import Path

In [3]:
def count_terminals_per_nonterminal(parse_str):
    # 結果を格納する辞書
    counts = {}

    def process_node(node_str):
        # カッコの中身を取り出す
        content = node_str.strip('()').strip()
        parts = content.split(None, 1)

        if not parts:
            return 0

        nonterminal = parts[0]

        # 末端記号(terminal)の場合
        if not parts[1].startswith('('):
            counts[nonterminal] = counts.get(nonterminal, 0) + 1
            return 1

        # 非終端記号(nonterminal)の場合
        rest = parts[1]
        terminal_count = 0

        # 子ノードを処理
        stack = []
        current = ''
        for char in rest:
            if char == '(':
                stack.append(char)
            elif char == ')':
                stack.pop()
            current += char

            if not stack and current.strip():
                terminal_count += process_node(current)
                current = ''

        counts[nonterminal] = counts.get(nonterminal, 0) + terminal_count
        return terminal_count

    # 入力文字列の各導出木を処理
    for tree in parse_str.strip().split('\n'):
        if tree:
            process_node(tree)

    return counts

# 使用例
parse_trees = """( ROOT ( 1S ( NP_Subj_P ( 5NP_P ( Adj gaunch ) ( Noun_P strogessors ) ) ( Subj sub ) ) ( VP_P ( VP_Comp_P ( 2VP_Comp_Pres_P ( 3S_Comp ( 1S ( NP_Subj_S ( NP_S ( Noun_S flingerician ) ) ( Subj sub ) ) ..."""

result = count_terminals_per_nonterminal(parse_trees)

# 結果を表示
for nonterminal, count in sorted(result.items()):
    print(f"{nonterminal}: {count}")

ROOT: 0
