In [13]:
import os
import json
from pathlib import Path

# Configuration
BASE_DIR = Path(r"C:\python\erpnext\erpnext")  # Base folder to crawl
OUTPUT_FILE = "erpnext_doctype_tree.json"

def crawl_json_files(base_dir):
    """Find all doctype JSON files"""
    json_files = []
    base_path = Path(base_dir)
    
    for json_file in base_path.rglob("*.json"):
        try:
            # Skip non-doctype directories
            if any(skip in str(json_file) for skip in [
                'node_modules', '.git', '__pycache__', 'builds', 'logs', 'public', 'assets'
            ]):
                continue
                
            with open(json_file, "r", encoding="utf-8") as f:
                data = json.load(f)
                
            if data.get("doctype") == "DocType" and data.get("name"):
                json_files.append({
                    "name": data.get("name"),
                    "file": str(json_file),
                    "data": data
                })
        except:
            continue
    
    return json_files

def find_true_children(parent_name, all_doctypes):
    """Find only TRUE children - Table fields and parent field references"""
    children = []
    
    # Find parent data
    parent_data = None
    for dt in all_doctypes:
        if dt["name"] == parent_name:
            parent_data = dt["data"]
            break
    
    if not parent_data:
        return children
    
    # 1. Table fields in parent -> true children
    for field in parent_data.get("fields", []):
        if field.get("fieldtype") == "Table" and field.get("options"):
            child_name = field["options"]
            
            # Find child doctype
            for dt in all_doctypes:
                if dt["name"] == child_name:
                    children.append({
                        "name": child_name,
                        "file": dt["file"],
                        "type": "table_child",
                        "field": field.get("fieldname"),
                        "children": find_true_children(child_name, all_doctypes)
                    })
                    break
    
    # 2. Find doctypes that have "parent" field pointing to this parent
    for dt in all_doctypes:
        for field in dt["data"].get("fields", []):
            if (field.get("fieldname") == "parent" and 
                field.get("options") == parent_name):
                
                children.append({
                    "name": dt["name"],
                    "file": dt["file"],
                    "type": "parent_child",
                    "field": "parent",
                    "children": find_true_children(dt["name"], all_doctypes)
                })
                break
    
    # 3. Find doctypes with naming pattern suggesting parent-child (like "User Group Member" -> "User Group")
    for dt in all_doctypes:
        dt_name = dt["name"]
        # Skip if already processed or is the parent itself
        if any(child["name"] == dt_name for child in children) or dt_name == parent_name:
            continue
            
        # Check if child name contains parent name and has a Link field to parent
        if parent_name in dt_name and dt_name != parent_name:
            for field in dt["data"].get("fields", []):
                if (field.get("fieldtype") == "Link" and 
                    field.get("options") == parent_name):
                    
                    children.append({
                        "name": dt_name,
                        "file": dt["file"],
                        "type": "named_child",
                        "field": field.get("fieldname"),
                        "children": find_true_children(dt_name, all_doctypes)
                    })
                    break
    
    # 4. Add lookups at the end (Link fields from this parent to other doctypes)
    for field in parent_data.get("fields", []):
        if field.get("fieldtype") == "Link" and field.get("options"):
            lookup_name = field["options"]
            
            # Skip if already added as a true child
            if any(child["name"] == lookup_name for child in children):
                continue
            
            # Verify lookup doctype exists
            for dt in all_doctypes:
                if dt["name"] == lookup_name:
                    children.append({
                        "name": lookup_name,
                        "file": dt["file"],
                        "type": "lookup",
                        "field": field.get("fieldname"),
                        "children": []
                    })
                    break
    
    return children

def find_root_doctypes(all_doctypes):
    """Find root doctypes - those not referenced as true children"""
    all_names = {dt["name"] for dt in all_doctypes}
    true_children = set()
    
    for dt in all_doctypes:
        # Table field options are true children
        for field in dt["data"].get("fields", []):
            if field.get("fieldtype") == "Table" and field.get("options"):
                true_children.add(field["options"])
        
        # Doctypes with "parent" field are true children
        for field in dt["data"].get("fields", []):
            if field.get("fieldname") == "parent" and field.get("options"):
                true_children.add(dt["name"])
                break
    
    # Also check for naming pattern children (like "User Group Member" -> "User Group")
    for dt in all_doctypes:
        dt_name = dt["name"]
        for other_dt in all_doctypes:
            other_name = other_dt["name"]
            # If this doctype's name contains another doctype's name
            if other_name in dt_name and dt_name != other_name:
                # And has a Link field to that doctype
                for field in dt["data"].get("fields", []):
                    if (field.get("fieldtype") == "Link" and 
                        field.get("options") == other_name):
                        true_children.add(dt_name)
                        break
    
    return all_names - true_children

def build_tree(all_doctypes):
    """Build the tree structure"""
    root_names = find_root_doctypes(all_doctypes)
    tree = []
    
    print(f"Found {len(root_names)} root doctypes: {sorted(list(root_names))}")
    
    for dt in all_doctypes:
        if dt["name"] in root_names:
            children = find_true_children(dt["name"], all_doctypes)
            
            tree.append({
                "name": dt["name"],
                "file": dt["file"],
                "type": "root",
                "module": dt["data"].get("module"),
                "children": children
            })
    
    return tree

def print_tree(tree, indent=0):
    """Print tree structure"""
    for node in tree:
        prefix = "  " * indent
        type_info = f" [{node['type']}]"
        if node.get('field') and node['type'] != 'root':
            type_info += f" (field: {node['field']})"
        
        print(f"{prefix}{node['name']}{type_info}")
        if node.get("children"):
            print_tree(node["children"], indent + 1)

# Main execution
if __name__ == "__main__":
    print("Starting Frappe DocType crawler...")
    
    # Get all doctypes
    all_doctypes = crawl_json_files(BASE_DIR)
    print(f"Found {len(all_doctypes)} doctype files")
    
    if not all_doctypes:
        print("No doctype files found. Check BASE_DIR path.")
        exit(1)
    
    # Build tree
    tree = build_tree(all_doctypes)
    
    # Save to JSON
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(tree, f, indent=2, ensure_ascii=False)
    
    print(f"\nTree saved to {OUTPUT_FILE}")
    print("\nTree structure:")
    print_tree(tree)

Starting Frappe DocType crawler...
Found 468 doctype files
Found 241 root doctypes: ['Account', 'Accounting Dimension', 'Accounting Dimension Filter', 'Accounting Period', 'Accounts Settings', 'Activity Cost', 'Activity Type', 'Advance Payment Ledger Entry', 'Appointment', 'Appointment Booking Settings', 'Authorization Control', 'Authorization Rule', 'Availability Of Slots', 'BOM', 'BOM Creator', 'Bank', 'Bank Account Subtype', 'Bank Account Type', 'Bank Clearance', 'Bank Reconciliation Tool', 'Bank Transaction', 'Batch', 'Bin', 'Bisect Accounting Statements', 'Bisect Nodes', 'Blanket Order', 'Branch', 'Brand', 'Budget', 'Bulk Transaction Log', 'Bulk Transaction Log Detail', 'Buying Settings', 'CRM Settings', 'Call Log', 'Campaign', 'Campaign Item', 'Cashier Closing', 'Chart of Accounts Importer', 'Cheque Print Template', 'Code List', 'Common Code', 'Communication Medium', 'Company', 'Competitor', 'Contract', 'Contract Template', 'Cost Center', 'Coupon Code', 'Currency Exchange', 'Curr

In [19]:
import os
import json
from pathlib import Path

# Configuration
BASE_DIR = Path(r"C:\python\erpnext\erpnext")
OUTPUT_FILE = "erpnext_doctype_permissions.json"

def crawl_json_files(base_dir):
    """Find all doctype JSON files and group by permissions"""
    without_permissions = []
    with_permissions = []
    
    for json_file in Path(base_dir).rglob("*.json"):
        try:
            if any(skip in str(json_file) for skip in [
                'node_modules', '.git', '__pycache__', 'builds', 'logs', 'public', 'assets'
            ]):
                continue
                
            with open(json_file, "r", encoding="utf-8") as f:
                data = json.load(f)
                
            if data.get("doctype") == "DocType" and data.get("name"):
                doctype_info = {"name": data["name"], "file": str(json_file)}
                
                permissions = data.get("permissions", [])
                if permissions and len(permissions) > 0:
                    with_permissions.append(doctype_info)
                else:
                    without_permissions.append(doctype_info)
        except:
            continue
    
    return without_permissions, with_permissions

if __name__ == "__main__":
    without_perms, with_perms = crawl_json_files(BASE_DIR)
    
    results = {
        "doctypes_without_permissions": sorted(without_perms, key=lambda x: x["name"]),
        "doctypes_with_permissions": sorted(with_perms, key=lambda x: x["name"])
    }
    
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2)
    
    print(f"WITHOUT permissions: {len(without_perms)}")
    print(f"WITH permissions: {len(with_perms)}")
    print(f"Total: {len(without_perms) + len(with_perms)}")
    print(f"Results saved to: {OUTPUT_FILE}")
    
    # Debug: print structure
    print(f"Keys in results: {list(results.keys())}")
    print(f"Items in without_permissions: {len(results['doctypes_without_permissions'])}")
    print(f"Items in with_permissions: {len(results['doctypes_with_permissions'])}")

WITHOUT permissions: 218
WITH permissions: 250
Total: 468
Results saved to: erpnext_doctype_permissions.json
Keys in results: ['doctypes_without_permissions', 'doctypes_with_permissions']
Items in without_permissions: 218
Items in with_permissions: 250


In [21]:
import os
import json
from pathlib import Path

# Configuration
BASE_DIR = Path(r"C:\python\frappe\frappe")  # Base folder to crawl
OUTPUT_FILE = "frappe_doctype_tree.json"
TREE_TXT_FILE = "frappe_doctype_tree_with_permissions.txt"

def crawl_json_files(base_dir):
    """Find all doctype JSON files"""
    json_files = []
    base_path = Path(base_dir)
    
    for json_file in base_path.rglob("*.json"):
        try:
            # Skip non-doctype directories
            if any(skip in str(json_file) for skip in [
                'node_modules', '.git', '__pycache__', 'builds', 'logs', 'public', 'assets'
            ]):
                continue
                
            with open(json_file, "r", encoding="utf-8") as f:
                data = json.load(f)
                
            if data.get("doctype") == "DocType" and data.get("name"):
                json_files.append({
                    "name": data.get("name"),
                    "file": str(json_file),
                    "data": data
                })
        except:
            continue
    
    return json_files

def has_permissions(doctype_data):
    """Check if doctype has any permissions defined"""
    permissions = doctype_data.get("permissions", [])
    return permissions and len(permissions) > 0

def find_true_children(parent_name, all_doctypes):
    """Find only TRUE children - Table fields and parent field references"""
    children = []
    
    # Find parent data
    parent_data = None
    for dt in all_doctypes:
        if dt["name"] == parent_name:
            parent_data = dt["data"]
            break
    
    if not parent_data:
        return children
    
    # 1. Table fields in parent -> true children
    for field in parent_data.get("fields", []):
        if field.get("fieldtype") == "Table" and field.get("options"):
            child_name = field["options"]
            
            # Find child doctype
            for dt in all_doctypes:
                if dt["name"] == child_name:
                    children.append({
                        "name": child_name,
                        "file": dt["file"],
                        "type": "table_child",
                        "field": field.get("fieldname"),
                        "children": find_true_children(child_name, all_doctypes)
                    })
                    break
    
    # 2. Find doctypes that have "parent" field pointing to this parent
    for dt in all_doctypes:
        for field in dt["data"].get("fields", []):
            if (field.get("fieldname") == "parent" and 
                field.get("options") == parent_name):
                
                children.append({
                    "name": dt["name"],
                    "file": dt["file"],
                    "type": "parent_child",
                    "field": "parent",
                    "children": find_true_children(dt["name"], all_doctypes)
                })
                break
    
    # 3. Find doctypes with naming pattern suggesting parent-child (like "User Group Member" -> "User Group")
    for dt in all_doctypes:
        dt_name = dt["name"]
        # Skip if already processed or is the parent itself
        if any(child["name"] == dt_name for child in children) or dt_name == parent_name:
            continue
            
        # Check if child name contains parent name and has a Link field to parent
        if parent_name in dt_name and dt_name != parent_name:
            for field in dt["data"].get("fields", []):
                if (field.get("fieldtype") == "Link" and 
                    field.get("options") == parent_name):
                    
                    children.append({
                        "name": dt_name,
                        "file": dt["file"],
                        "type": "named_child",
                        "field": field.get("fieldname"),
                        "children": find_true_children(dt_name, all_doctypes)
                    })
                    break
    
    # 4. Add lookups at the end (Link fields from this parent to other doctypes)
    for field in parent_data.get("fields", []):
        if field.get("fieldtype") == "Link" and field.get("options"):
            lookup_name = field["options"]
            
            # Skip if already added as a true child
            if any(child["name"] == lookup_name for child in children):
                continue
            
            # Verify lookup doctype exists
            for dt in all_doctypes:
                if dt["name"] == lookup_name:
                    children.append({
                        "name": lookup_name,
                        "file": dt["file"],
                        "type": "lookup",
                        "field": field.get("fieldname"),
                        "children": []
                    })
                    break
    
    return children

def find_root_doctypes(all_doctypes):
    """Find root doctypes - those not referenced as true children"""
    all_names = {dt["name"] for dt in all_doctypes}
    true_children = set()
    
    for dt in all_doctypes:
        # Table field options are true children
        for field in dt["data"].get("fields", []):
            if field.get("fieldtype") == "Table" and field.get("options"):
                true_children.add(field["options"])
        
        # Doctypes with "parent" field are true children
        for field in dt["data"].get("fields", []):
            if field.get("fieldname") == "parent" and field.get("options"):
                true_children.add(dt["name"])
                break
    
    # Also check for naming pattern children (like "User Group Member" -> "User Group")
    for dt in all_doctypes:
        dt_name = dt["name"]
        for other_dt in all_doctypes:
            other_name = other_dt["name"]
            # If this doctype's name contains another doctype's name
            if other_name in dt_name and dt_name != other_name:
                # And has a Link field to that doctype
                for field in dt["data"].get("fields", []):
                    if (field.get("fieldtype") == "Link" and 
                        field.get("options") == other_name):
                        true_children.add(dt_name)
                        break
    
    return all_names - true_children

def build_tree(all_doctypes):
    """Build the tree structure"""
    root_names = find_root_doctypes(all_doctypes)
    tree = []
    
    print(f"Found {len(root_names)} root doctypes: {sorted(list(root_names))}")
    
    for dt in all_doctypes:
        if dt["name"] in root_names:
            children = find_true_children(dt["name"], all_doctypes)
            
            tree.append({
                "name": dt["name"],
                "file": dt["file"],
                "type": "root",
                "module": dt["data"].get("module"),
                "children": children
            })
    
    return tree

def print_tree(tree, indent=0):
    """Print tree structure"""
    for node in tree:
        prefix = "  " * indent
        type_info = f" [{node['type']}]"
        if node.get('field') and node['type'] != 'root':
            type_info += f" (field: {node['field']})"
        
        print(f"{prefix}{node['name']}{type_info}")
        if node.get("children"):
            print_tree(node["children"], indent + 1)

def print_tree_with_permissions(tree, all_doctypes, file_handle, indent=0):
    """Print tree structure with permissions tags to file"""
    for node in tree:
        prefix = "  " * indent
        type_info = f" [{node['type']}]"
        if node.get('field') and node['type'] != 'root':
            type_info += f" (field: {node['field']})"
        
        # Find doctype data and check for permissions
        permissions_tag = ""
        for dt in all_doctypes:
            if dt["name"] == node["name"]:
                if has_permissions(dt["data"]):
                    permissions_tag = " <PERMISSIONS>"
                break
        
        line = f"{prefix}{node['name']}{type_info}{permissions_tag}\n"
        file_handle.write(line)
        
        if node.get("children"):
            print_tree_with_permissions(node["children"], all_doctypes, file_handle, indent + 1)

# Main execution
if __name__ == "__main__":
    print("Starting Frappe DocType crawler...")
    
    # Get all doctypes
    all_doctypes = crawl_json_files(BASE_DIR)
    print(f"Found {len(all_doctypes)} doctype files")
    
    if not all_doctypes:
        print("No doctype files found. Check BASE_DIR path.")
        exit(1)
    
    # Build tree
    tree = build_tree(all_doctypes)
    
    # Save to JSON
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(tree, f, indent=2, ensure_ascii=False)
    
    # Save tree with permissions to TXT
    with open(TREE_TXT_FILE, "w", encoding="utf-8") as f:
        print_tree_with_permissions(tree, all_doctypes, f)
    
    print(f"\nTree saved to {OUTPUT_FILE}")
    print(f"Tree with permissions saved to {TREE_TXT_FILE}")
    print("\nTree structure:")
    print_tree(tree)

Starting Frappe DocType crawler...
Found 272 doctype files
Found 177 root doctypes: ['About Us Settings', 'Access Log', 'Activity Log', 'Address', 'Address Template', 'Assignment Rule', 'Audit Trail', 'Auto Repeat', 'Blog Category', 'Blog Post', 'Blog Settings', 'Blogger', 'Bulk Update', 'Calendar View', 'Changelog Feed', 'Client Script', 'Color', 'Comment', 'Communication', 'Connected App', 'Console Log', 'Contact', 'Contact Us Settings', 'Country', 'Currency', 'Custom DocPerm', 'Custom Field', 'Custom HTML Block', 'Custom Role', 'Customize Form', 'Dashboard', 'Dashboard Chart', 'Dashboard Chart Source', 'Dashboard Settings', 'Data Export', 'Data Import', 'Deleted Document', 'Desktop Icon', 'Discussion Reply', 'Discussion Topic', 'DocShare', 'DocType', 'Document Follow', 'Document Naming Rule', 'Document Naming Settings', 'Document Share Key', 'Domain', 'Domain Settings', 'Dropbox Settings', 'Email Account', 'Email Domain', 'Email Flag Queue', 'Email Group', 'Email Queue', 'Email Rule