In [None]:
import os
import json

from lammps_ast.parser import parse_to_AST

In [None]:
scripts_dir = "generated_scripts/"
parsed_trees_dir = "parsed_trees/"

os.makedirs(parsed_trees_dir, exist_ok=True)

In [None]:
tasks = sorted(next(os.walk(scripts_dir))[1])
task_model_map = {}

for task in tasks:
    task_dir = os.path.join(scripts_dir, task)
    models = sorted(next(os.walk(task_dir))[1])  
    task_model_map[task] = models

# Display detected structure
for task, models in task_model_map.items():
    print(f"📂 {task}: {', '.join(models)}")

In [None]:
def save_ast(ast, output_path):
    """Save the AST as a JSON-like structure for debugging and analysis."""
    if ast is None:
        return
    
    def tree_to_dict(tree):
        """Convert Lark Tree to dictionary for JSON storage."""
        if isinstance(tree, str):
            return tree
        return {"type": tree.data, "children": [tree_to_dict(child) for child in tree.children]}

    parsed_tree_dict = tree_to_dict(ast)

    with open(output_path, "w") as f:
        json.dump(parsed_tree_dict, f, indent=4)

# Process all scripts in each task/model directory
for task, models in task_model_map.items():
    for model in models:
        script_dir = os.path.join(scripts_dir, task, model)
        output_dir = os.path.join(parsed_trees_dir, task, model)

        os.makedirs(output_dir, exist_ok=True)  # Ensure output folder exists

        # Iterate over all Python files in the directory
        for script_file in os.listdir(script_dir):
            if script_file.endswith(".in"):  # Process only Python files
                script_path = os.path.join(script_dir, script_file)
                output_path = os.path.join(output_dir, f"{script_file}_ast.json")
                print(f"-------------------------------------")
                print(f"Parsing: {script_path}")
                ast = parse_to_AST(script_path)
                save_ast(ast, output_path)

print("\nAll scripts parsed. ASTs saved in `publications/parsed_trees/`")
