In [9]:
from neo4j import GraphDatabase

uri = "neo4j://127.0.0.1:7687"
username = "neo4j"
password = "1234567890"

driver = GraphDatabase.driver(uri, auth=(username, password))
neo4j_session = driver.session(database="ontology")

In [10]:
def count_graph_elements(tx):
    node_count_query = "MATCH (n) RETURN count(n) AS node_count"
    rel_count_query = "MATCH ()-[r]->() RETURN count(r) AS rel_count"

    node_result = tx.run(node_count_query).single()
    rel_result = tx.run(rel_count_query).single()

    node_count = node_result["node_count"]
    rel_count = rel_result["rel_count"]

    return node_count, rel_count

In [11]:
with neo4j_session.begin_transaction() as tx:
    node_count, rel_count = count_graph_elements(tx)
    print(f"Total concepts (nodes): {node_count}")
    print(f"Total relations (edges): {rel_count}")


Total concepts (nodes): 4023
Total relations (edges): 6918


In [12]:
import os

def export_relation_names_to_txt(session, output_path="relations.txt"):
    """
    Export all unique relation names (r.name) from Neo4j to a text file.
    Each line in the file will contain one relation name.
    """
    with session.begin_transaction() as tx:
        result = tx.run("""
            MATCH ()-[r:RELATION]->()
            WHERE r.name IS NOT NULL
            RETURN DISTINCT r.name AS relation_name
            ORDER BY r.name
        """)
        relation_names = [record["relation_name"] for record in result]

    # Write to file
    directory = os.path.dirname(output_path)
    if directory:
        os.makedirs(directory, exist_ok=True)
    with open(output_path, "w", encoding="utf-8") as f:
        for name in relation_names:
            f.write(name.strip() + "\n")

    print(f"✅ Exported {len(relation_names)} unique relation names to {output_path}")

    return relation_names

In [13]:
export_relation_names_to_txt(neo4j_session, "relation_names.txt")

✅ Exported 841 unique relation names to relation_names.txt


['A36',
 'B4',
 'C115',
 'C184',
 'C212',
 'C225',
 'C281',
 'C304',
 'C310',
 'D101',
 'D60',
 'D83',
 'D96',
 'E100',
 'F140',
 'F263',
 'F280',
 'F66',
 'G111',
 'G115',
 'G117',
 'G143',
 'G154',
 'G190',
 'G207',
 'G25',
 'G289',
 'G30',
 'G32',
 'G4',
 'G60',
 'G63',
 'G72',
 'H17',
 'H184',
 'H245',
 'H25',
 'H26',
 'I103',
 'I12',
 'I26',
 'I3',
 'I42',
 'I43',
 'I7',
 'I81',
 'J130',
 'J136',
 'J137',
 'J144',
 'J153',
 'J211',
 'J215',
 'J217',
 'J227',
 'J236',
 'J33',
 'J36',
 'J40',
 'J47',
 'J60',
 'J88',
 'K144',
 'K59',
 'K79',
 'L174',
 'L226',
 'L273',
 'L37',
 'M1',
 'M105',
 'M223',
 'M26',
 'M4',
 'M50',
 'M51',
 'M52',
 'M53',
 'M54',
 'M57',
 'M64',
 'M7',
 'M72',
 'M8',
 'M9',
 'N1',
 'N10',
 'N107',
 'N123',
 'N126',
 'N130',
 'N131',
 'N134',
 'N135',
 'N138',
 'N139',
 'N150',
 'N151',
 'N154',
 'N157',
 'N158',
 'N160',
 'N163',
 'N165',
 'N167',
 'N193',
 'N194',
 'N210',
 'N211',
 'N214',
 'N215',
 'N223',
 'N226',
 'N256',
 'N257',
 'N258',
 'N268',
 'N29

In [14]:
import os

def export_concept_names_to_txt(session, output_path="concepts.txt"):
    """
    Export all unique concept node names (c.name) from Neo4j to a text file.
    Each line in the file will contain one concept name.
    """
    with session.begin_transaction() as tx:
        result = tx.run("""
            MATCH (c:Concept)
            WHERE c.name IS NOT NULL
            RETURN DISTINCT c.name AS concept_name
            ORDER BY c.name
        """)
        concept_names = [record["concept_name"] for record in result]

    # Write to file
    directory = os.path.dirname(output_path)
    if directory:
        os.makedirs(directory, exist_ok=True)
    with open(output_path, "w", encoding="utf-8") as f:
        for name in concept_names:
            f.write(name.strip() + "\n")

    print(f"✅ Exported {len(concept_names)} unique concept names to {output_path}")

    return concept_names

In [15]:
export_concept_names_to_txt(neo4j_session, "concept_names.txt")

✅ Exported 4023 unique concept names to concept_names.txt


['A153',
 'A19',
 'A32',
 'B167',
 'B169',
 'B177',
 'B179',
 'C10',
 'C100',
 'C105',
 'C160',
 'C181',
 'C182',
 'C188',
 'C194',
 'C2',
 'C228',
 'C293',
 'C71',
 'C78',
 'C79',
 'C8',
 'C83',
 'C84',
 'C85',
 'D108',
 'D28',
 'D3',
 'D65',
 'E134',
 'F120',
 'F14',
 'F207',
 'F263',
 'F273',
 'F275',
 'F278',
 'F66',
 'F8',
 'G126',
 'G148',
 'G154',
 'G163',
 'G164',
 'G179',
 'G191',
 'G194',
 'G196',
 'G230',
 'G267',
 'G281',
 'G285',
 'G293',
 'G43',
 'G63',
 'G71',
 'G72',
 'G89',
 'G9',
 'H112',
 'H113',
 'H185',
 'H213',
 'H230',
 'H30',
 'H38',
 'H4',
 'H42',
 'H43',
 'H5',
 'H67',
 'H73',
 'H83',
 'H90',
 'I10',
 'I101',
 'I133',
 'I136',
 'I140',
 'I141',
 'I2',
 'I3',
 'I42',
 'I43',
 'I45',
 'I7',
 'I70',
 'I73',
 'J114',
 'J138',
 'J157',
 'J182',
 'J210',
 'J219',
 'J220',
 'J225',
 'J226',
 'J227',
 'J228',
 'J229',
 'J231',
 'J235',
 'J33',
 'J35',
 'J41',
 'J47',
 'J60',
 'J80',
 'J82',
 'K205',
 'K32',
 'K59',
 'K71',
 'K79',
 'K84',
 'L10',
 'L106',
 'L144',
 'L

In [16]:
import random

def get_random_triplets(session, limit=10):
    """
    Extract random triplets (concept1 -[relation]-> concept2) from Neo4j.
    Returns a list of (concept1, relation, concept2).
    """
    with session.begin_transaction() as tx:
        result = tx.run("""
            MATCH (c1:Concept)-[r:RELATION]->(c2:Concept)
            WHERE r.name IS NOT NULL
            RETURN c1.name AS concept1, r.name AS relation, c2.name AS concept2
        """)
        triplets = [(rec["concept1"], rec["relation"], rec["concept2"]) for rec in result]

    if not triplets:
        print("No triplets found in Neo4j.")
        return []

    # Pick random subset
    sample = random.sample(triplets, min(limit, len(triplets)))

    print(f"\n🎲 Showing {len(sample)} random triplets out of {len(triplets)} total:\n")
    for i, (c1, rel, c2) in enumerate(sample, start=1):
        print(f"{i:02d}. {c1} —[{rel}]→ {c2}")

    return sample

In [17]:
get_random_triplets(neo4j_session, limit=5)


🎲 Showing 5 random triplets out of 6918 total:

01. trường hợp xe quá khổ giới hạn —[NEW01]→ quá khổ giới hạn công trình đường bộ
02. báo cáo kết quả —[xử lý]→ vi phạm địa phương
03. sở giao thông vận tải —[NEW01]→ G71
04. J220 —[N167]→ định kỳ hàng năm hạng mục công trình dân dụng
05. hợp đồng dự án thực hiện —[N167]→ công trình đường bộ đầu tư


[('trường hợp xe quá khổ giới hạn',
  'NEW01',
  'quá khổ giới hạn công trình đường bộ'),
 ('báo cáo kết quả', 'xử lý', 'vi phạm địa phương'),
 ('sở giao thông vận tải', 'NEW01', 'G71'),
 ('J220', 'N167', 'định kỳ hàng năm hạng mục công trình dân dụng'),
 ('hợp đồng dự án thực hiện', 'N167', 'công trình đường bộ đầu tư')]

In [28]:
def extract_triplets_for_relations(session, relation_names, output_path="triplets_by_relation.txt"):
    """
    Extract all triplets for a list of relation names and export them to a text file.

    Args:
        session: Active Neo4j session.
        relation_names (list[str]): List of relation names to extract.
        output_path (str): File path to save the results.

    Returns:
        dict: {relation_name: [(subject, relation, object), ...]}
    """
    all_triplets = {}

    with open(output_path, "w", encoding="utf-8") as f:
        for rel_name in relation_names:
            with session.begin_transaction() as tx:
                result = tx.run("""
                    MATCH (s)-[r:RELATION {name: $relation_name}]->(o)
                    RETURN s.name AS subject, r.name AS relation, o.name AS object
                    ORDER BY s.name, o.name
                """, relation_name=rel_name)
                triplets = [
                    (record["subject"], record["relation"], record["object"])
                    for record in result
                ]

            all_triplets[rel_name] = triplets

            # Write section header and triplets to file
            f.write(f"=== Relation: {rel_name} ===\n")
            for s, r, o in triplets:
                f.write(f"{s}\t{r}\t{o}\n")
            f.write("\n")

            print(f"✅ Found {len(triplets)} triplets for relation '{rel_name}'")

    print(f"\n📄 All triplets saved to: {output_path}")
    return all_triplets

In [29]:
relations_to_find = [
    "cấp đăng ký"
]
results = extract_triplets_for_relations(neo4j_session, relations_to_find)
for result in results:
    print(result)

✅ Found 1 triplets for relation 'cấp đăng ký'

📄 All triplets saved to: triplets_by_relation.txt
cấp đăng ký
