In [3]:
import yaml
import pandas as pd
from collections import defaultdict

logs_path = 'data/342_table_statistics/logs.csv'
yaml_path = 'data/342_table_statistics/sample.yaml'
target_prefix = '/home/statistics'

def traverse(node_id, node, current_path):
    name = node.get('name')
    path = current_path + '/' + name if name else current_path
    id_to_node[node_id] = node
    id_to_path[node_id] = path
    values = node.get('values') or {}
    for child_id, child in values.items():
        traverse(child_id, child, path)

def resolve_final_id(node_id):
    visited = set()
    cur = node_id
    while cur and cur not in visited:
        visited.add(cur)
        node = id_to_node.get(cur)
        if node is None:
            return None
        link = node.get('link')
        if link:
            cur = link
            continue
        return cur
    return None


with open(yaml_path, 'r', encoding='utf-8') as f:
    folders = yaml.safe_load(f)

id_to_node = {}
id_to_path = {}

for root_id, root in folders.items():
    traverse(root_id, root, '')

df = pd.read_csv(logs_path, dtype=str)
node_series = df['node_id'].dropna().astype(str)

resolved_paths = []
for nid in node_series:
    nid = nid.strip()
    final_id = resolve_final_id(nid)
    path = id_to_path.get(final_id)
    resolved_paths.append(path)

count_total = 0
counts = defaultdict(int)
for p in resolved_paths:
    if p and (p == target_prefix or p.startswith(target_prefix + '/')):
        count_total += 1
        counts[p] += 1

print(f"Всего обращений к '{target_prefix}': {count_total}")

Всего обращений к '/home/statistics': 118
