In [14]:
import json
from pathlib import Path
import pandas as pd

# Folder containing the json files
saves_path = Path("processed")

rows = []

for json_file in saves_path.glob("*.json"):
    with open(json_file, "r") as f:
        data = json.load(f)

    nodes = data.get("nodes", {})
    lines = data.get("lines", {})

    num_nodes = sum(
        1 for node_id in nodes.keys() if "b" not in node_id
    )
    num_lines = len(lines)

    # Count lines that include 'b' in from_node or to_node
    num_measures = sum(
        1
        for line in lines.values()
        if "b" in line.get("from_node", "") or "b" in line.get("to_node", "")
    )

    score = num_nodes + 2 * num_measures + num_lines / num_nodes

    rows.append({
        "file_name": json_file.name,
        "num_nodes": num_nodes,
        "num_lines": num_lines / num_nodes,
        "num_measures": num_measures,
        "score": score,
    })

# Create DataFrame
df = pd.DataFrame(rows)

# Sort by score (lowest to highest)
df = df.sort_values(by="score", ascending=True)

# Round all numeric columns to max 2 significant digits
numeric_cols = df.select_dtypes(include="number").columns
df[numeric_cols] = df[numeric_cols].round(2)

# Reset index for clean display
df = df.reset_index(drop=True)

df


Unnamed: 0,file_name,num_nodes,num_lines,num_measures,score
0,Level1.json,4,1.5,2,9.5
1,Level2.json,6,1.67,1,9.67
2,Level3.json,5,1.6,2,10.6
3,Level4.json,8,1.75,1,11.75
4,Level5.json,5,1.8,3,12.8
5,Level6.json,6,1.67,3,13.67
6,Level7.json,8,1.75,2,13.75
7,Level8.json,8,1.75,3,15.75
8,Level9.json,10,1.8,2,15.8
9,Level10.json,10,1.8,2,15.8
