In [None]:
from neo4j import GraphDatabase
 
# Define correct URI and AUTH arguments (no AUTH by default)
URI = "bolt://localhost:7687"
AUTH = ("", "")
 
with GraphDatabase.driver(URI, auth=AUTH) as client:
    # Check the connection
    client.verify_connectivity()
 
    # Find a user John in the database
    records, summary, keys = client.execute_query(
        "CALL export_util.json('/usr/lib/memgraph/query_modules/export.json') YIELD objects return objects;",
        database_="memgraph",
    )
 
    # Get the result
    for record in records:
        print(record["name"])
 
    # Print the query
    print(summary.query)

In [68]:
def restructure_json_list(data_list):
    """
    Restructures a list of JSON-like dictionaries to group relationships by the 'relationship' key.

    Args:
        data_list: A list of dictionaries, where each dictionary contains a 'relationship' key.

    Returns:
        A dictionary where the 'relationship' key maps to a list of relationship dictionaries.
    """
    if not isinstance(data_list, list):
      return data_list #raise TypeError("Input must be a list")

    nodes = []
    for item in data_list:
        if isinstance(item, dict) and 'node' in item and isinstance(item['node'], dict):
            node = item['node'].copy()  # Create a copy to avoid modifying the original
            if 'name' in node:
                node['id'] = node.pop('name')  # Rename 'name' to 'id'
            nodes.append(node)

    #return {"nodes": nodes}

    relationships = []
    for item in data_list:
        if isinstance(item, dict) and 'relationship' in item and isinstance(item['relationship'], dict):
            relationships.append(item['relationship'])
        #if you have other keys in the original list, you can add more elif statements here.

    return {"nodes": nodes,
        "relationships": relationships}

In [83]:
def transform_nodes(nodes_list):
    """
    Transforms a list of node dictionaries from the given format to {'id': '...', 'type': '...'}.

    Args:
        nodes_list: A list of node dictionaries.

    Returns:
        A list of transformed node dictionaries.
    """
    if not isinstance(nodes_list, list):
      raise TypeError("Input must be a list")

    transformed_nodes = []
    for node in nodes_list:
        if isinstance(node, dict) and 'id' in node and 'labels' in node and isinstance(node['labels'], list) and len(node['labels']) > 0:
            transformed_node = {'id': node['id'], 'type': node['labels'][0].replace(" ", "")} #remove spaces in the type name.
            transformed_nodes.append(transformed_node)
        #if you have other node formats, add elif statements here.
        else:
            transformed_nodes.append(node)
    return transformed_nodes

In [85]:
from neo4j import GraphDatabase
import json

# Define connection details
URI = "bolt://localhost:7687"
AUTH = ("", "")  # No authentication for Memgraph

# Load JSON data from a file
def load_json_from_file(filename):
    with open(filename, "r", encoding="utf-8") as file:
        print("Opening file: ", filename)
        return json.load(file)

# Function to load data into Memgraph
def load_graph_from_json(json_data):
    with GraphDatabase.driver(URI, auth=AUTH) as client:
        client.verify_connectivity()
        json_data = restructure_json_list(json_data)

        

        # Create Nodes
        for node in transform_nodes(json_data["nodes"]):
            query = """
            MERGE (n:$type {id: $id})
            RETURN n.id AS name;
            """
            
            records, _, _ = client.execute_query(query, type=node["type"], id=node["id"], database_="memgraph")

            # Print created nodes
            for record in records:
                print("Created node:", record["name"])

        # Create Relationships
        for rel in transform_nodes(json_data["relationships"]):
            source = rel["source"]
            target = rel["target"]
            relation = rel["relation"]

            query = f"""
            MATCH (s {{id: $source}}), (t {{id: $target}})
            MERGE (s)-[r:{relation}]->(t)
            RETURN type(r) AS relation;
            """
            
            print("Executing Query:", query)  # Debugging output
            
            records, _, _ = client.execute_query(
                query, source=source, target=target, database_="memgraph"
            )

            # Print created relationships
            for record in records:
                print("Created relationship:", record["relation"])

# Load JSON from file and insert it into Memgraph
#json_filename = "answer_from_LLM.json"  # Replace with your actual file path
#json_data = load_json_from_file(json_filename)
#load_graph_from_json(json_data)


In [86]:
import os

llm_answers_path = r"LLM_answers"
json_files = [f for f in os.listdir(llm_answers_path) if f.endswith(".json")]


for filename in json_files:
    llm_json_path = os.path.join(llm_answers_path, filename)
    json_data = load_json_from_file(llm_json_path)
    load_graph_from_json(json_data)

    

Opening file:  LLM_answers\answer_20250305_022711.json
Created node: ProspectIncremental
Created node: wh_db_stage
Created node: agencyid
Created node: lastname
Created node: firstname
Created node: middleinitial
Created node: gender
Created node: addressline1
Created node: addressline2
Created node: postalcode
Created node: city
Created node: state
Created node: country
Created node: phone
Created node: income
Created node: numbercars
Created node: numberchildren
Created node: maritalstatus
Created node: age
Created node: creditrating
Created node: ownorrentflag
Created node: employer
Created node: numbercreditcards
Created node: networth
Created node: marketingnameplate
Created node: recordbatchid
Created node: Unique identifier from agency
Created node: Last name
Created node: First name
Created node: Middle initial
Created node: â€˜Mâ€™ or â€˜Fâ€™ or â€˜Uâ€™
Created node: Postal address
Created node: Postal address
Created node: Postal code
Created node: City
Created node: State or

In [71]:
asd = load_json_from_file(r"LLM_answers\answer_20250305_022936.json")
asd2 = load_json_from_file(r"LLM_answers\answer_20250305_022947.json")

Opening file:  LLM_answers\answer_20250305_022936.json
Opening file:  LLM_answers\answer_20250305_022947.json


In [73]:
asd["nodes"]

[{'id': 'wh_db.FactWatches',
  'labels': ['TableName'],
  'properties': {'name': 'wh_db.FactWatches'}},
 {'id': 'sk_customerid',
  'labels': ['ColumnName'],
  'properties': {'name': 'sk_customerid'}},
 {'id': 'sk_securityid',
  'labels': ['ColumnName'],
  'properties': {'name': 'sk_securityid'}},
 {'id': 'sk_dateid_dateplaced',
  'labels': ['ColumnName'],
  'properties': {'name': 'sk_dateid_dateplaced'}},
 {'id': 'sk_dateid_dateremoved',
  'labels': ['ColumnName'],
  'properties': {'name': 'sk_dateid_dateremoved'}},
 {'id': 'batchid',
  'labels': ['ColumnName'],
  'properties': {'name': 'batchid'}},
 {'id': 'BIGINT', 'labels': ['Data Type'], 'properties': {'name': 'BIGINT'}},
 {'id': 'INT', 'labels': ['Data Type'], 'properties': {'name': 'INT'}},
 {'id': 'wh_db', 'labels': ['SchemaName'], 'properties': {'name': 'wh_db'}},
 {'id': 'Customer associated with watch list',
  'labels': ['Comment'],
  'properties': {'text': 'Customer associated with watch list'}},
 {'id': 'Security listed on 

In [79]:
def transform_nodes(nodes_list):
    """
    Transforms a list of node dictionaries from the given format to {'id': '...', 'type': '...'}.

    Args:
        nodes_list: A list of node dictionaries.

    Returns:
        A list of transformed node dictionaries.
    """
    if not isinstance(nodes_list, list):
      raise TypeError("Input must be a list")

    transformed_nodes = []
    for node in nodes_list:
        if isinstance(node, dict) and 'id' in node and 'labels' in node and isinstance(node['labels'], list) and len(node['labels']) > 0:
            transformed_node = {'id': node['id'], 'type': node['labels'][0].replace(" ", "")} #remove spaces in the type name.
            transformed_nodes.append(transformed_node)
        #if you have other node formats, add elif statements here.
        else:
            transformed_nodes.append(node)
    return transformed_nodes

In [81]:
transform_nodes(asd2["nodes"])

[{'id': 'sk_companyid', 'type': 'ColumnName'},
 {'id': 'Financial', 'type': 'TableName'},
 {'id': 'BIGINT', 'type': 'Data Type'},
 {'id': 'wh_db', 'type': 'SchemaName'},
 {'id': 'Company SK.', 'type': 'Comment'},
 {'id': 'fi_year', 'type': 'ColumnName'},
 {'id': 'INT', 'type': 'Data Type'},
 {'id': 'Year of the quarter end.', 'type': 'Comment'},
 {'id': 'fi_qtr', 'type': 'ColumnName'},
 {'id': 'Quarter number that the financial information is for: valid values 1, 2, 3, 4.',
  'type': 'Comment'},
 {'id': 'fi_qtr_start_date', 'type': 'ColumnName'},
 {'id': 'DATE', 'type': 'Data Type'},
 {'id': 'Start date of quarter.', 'type': 'Comment'},
 {'id': 'fi_revenue', 'type': 'ColumnName'},
 {'id': 'DOUBLE', 'type': 'Data Type'},
 {'id': 'Reported revenue for the quarter.', 'type': 'Comment'},
 {'id': 'fi_net_earn', 'type': 'ColumnName'},
 {'id': 'Net earnings reported for the quarter.', 'type': 'Comment'},
 {'id': 'fi_basic_eps', 'type': 'ColumnName'},
 {'id': 'Basic earnings per share for the 

In [39]:
def restructure_and_rename_nodes(data_list):
    """
    Restructures a list of JSON-like dictionaries to group nodes by the 'node' key,
    renames 'name' to 'id', and returns a dictionary with 'nodes' as the key.

    Args:
        data_list: A list of dictionaries, where each dictionary contains a 'node' key.

    Returns:
        A dictionary where the 'nodes' key maps to a list of node dictionaries with 'id' instead of 'name'.
    """
    if not isinstance(data_list, list):
      return data_list # raise TypeError("Input must be a list")

    nodes = []
    for item in data_list:
        if isinstance(item, dict) and 'node' in item and isinstance(item['node'], dict):
            node = item['node'].copy()  # Create a copy to avoid modifying the original
            if 'name' in node:
                node['id'] = node.pop('name')  # Rename 'name' to 'id'
            nodes.append(node)

    return {"nodes": nodes}

restructure_and_rename_nodes(asd)

{'nodes': [{'type': 'TableName', 'id': 'ProspectIncremental'},
  {'type': 'SchemaName', 'id': 'wh_db_stage'},
  {'type': 'ColumnName', 'id': 'agencyid'},
  {'type': 'ColumnName', 'id': 'lastname'},
  {'type': 'ColumnName', 'id': 'firstname'},
  {'type': 'ColumnName', 'id': 'middleinitial'},
  {'type': 'ColumnName', 'id': 'gender'},
  {'type': 'ColumnName', 'id': 'addressline1'},
  {'type': 'ColumnName', 'id': 'addressline2'},
  {'type': 'ColumnName', 'id': 'postalcode'},
  {'type': 'ColumnName', 'id': 'city'},
  {'type': 'ColumnName', 'id': 'state'},
  {'type': 'ColumnName', 'id': 'country'},
  {'type': 'ColumnName', 'id': 'phone'},
  {'type': 'ColumnName', 'id': 'income'},
  {'type': 'ColumnName', 'id': 'numbercars'},
  {'type': 'ColumnName', 'id': 'numberchildren'},
  {'type': 'ColumnName', 'id': 'maritalstatus'},
  {'type': 'ColumnName', 'id': 'age'},
  {'type': 'ColumnName', 'id': 'creditrating'},
  {'type': 'ColumnName', 'id': 'ownorrentflag'},
  {'type': 'ColumnName', 'id': 'empl

In [65]:
def restructure_json_list(data_list):
    """
    Restructures a list of JSON-like dictionaries to group relationships by the 'relationship' key.

    Args:
        data_list: A list of dictionaries, where each dictionary contains a 'relationship' key.

    Returns:
        A dictionary where the 'relationship' key maps to a list of relationship dictionaries.
    """
    if not isinstance(data_list, list):
      return data_list #raise TypeError("Input must be a list")

    nodes = []
    for item in data_list:
        if isinstance(item, dict) and 'node' in item and isinstance(item['node'], dict):
            node = item['node'].copy()  # Create a copy to avoid modifying the original
            if 'name' in node:
                node['id'] = node.pop('name')  # Rename 'name' to 'id'
            nodes.append(node)

    #return {"nodes": nodes}

    relationships = []
    for item in data_list:
        if isinstance(item, dict) and 'relationship' in item and isinstance(item['relationship'], dict):
            relationships.append(item['relationship'])
        #if you have other keys in the original list, you can add more elif statements here.

    return {"nodes": nodes,
        "relationships": relationships}


restructured_data = restructure_json_list(asd2)

restructured_data


{'nodes': [{'id': 'sk_companyid', 'type': 'ColumnName'},
  {'id': 'Financial', 'type': 'TableName'},
  {'id': 'BIGINT', 'type': 'Data Type'},
  {'id': 'wh_db', 'type': 'SchemaName'},
  {'id': 'Company SK.', 'type': 'Comment'},
  {'id': 'fi_year', 'type': 'ColumnName'},
  {'id': 'INT', 'type': 'Data Type'},
  {'id': 'Year of the quarter end.', 'type': 'Comment'},
  {'id': 'fi_qtr', 'type': 'ColumnName'},
  {'id': 'Quarter number that the financial information is for: valid values 1, 2, 3, 4.',
   'type': 'Comment'},
  {'id': 'fi_qtr_start_date', 'type': 'ColumnName'},
  {'id': 'DATE', 'type': 'Data Type'},
  {'id': 'Start date of quarter.', 'type': 'Comment'},
  {'id': 'fi_revenue', 'type': 'ColumnName'},
  {'id': 'DOUBLE', 'type': 'Data Type'},
  {'id': 'Reported revenue for the quarter.', 'type': 'Comment'},
  {'id': 'fi_net_earn', 'type': 'ColumnName'},
  {'id': 'Net earnings reported for the quarter.', 'type': 'Comment'},
  {'id': 'fi_basic_eps', 'type': 'ColumnName'},
  {'id': 'Bas

In [67]:
restructured_data = restructure_json_list(asd2)

restructured_data["nodes"]

[{'id': 'sk_companyid', 'type': 'ColumnName'},
 {'id': 'Financial', 'type': 'TableName'},
 {'id': 'BIGINT', 'type': 'Data Type'},
 {'id': 'wh_db', 'type': 'SchemaName'},
 {'id': 'Company SK.', 'type': 'Comment'},
 {'id': 'fi_year', 'type': 'ColumnName'},
 {'id': 'INT', 'type': 'Data Type'},
 {'id': 'Year of the quarter end.', 'type': 'Comment'},
 {'id': 'fi_qtr', 'type': 'ColumnName'},
 {'id': 'Quarter number that the financial information is for: valid values 1, 2, 3, 4.',
  'type': 'Comment'},
 {'id': 'fi_qtr_start_date', 'type': 'ColumnName'},
 {'id': 'DATE', 'type': 'Data Type'},
 {'id': 'Start date of quarter.', 'type': 'Comment'},
 {'id': 'fi_revenue', 'type': 'ColumnName'},
 {'id': 'DOUBLE', 'type': 'Data Type'},
 {'id': 'Reported revenue for the quarter.', 'type': 'Comment'},
 {'id': 'fi_net_earn', 'type': 'ColumnName'},
 {'id': 'Net earnings reported for the quarter.', 'type': 'Comment'},
 {'id': 'fi_basic_eps', 'type': 'ColumnName'},
 {'id': 'Basic earnings per share for the 

In [7]:
for rel in json_data["relationships"]:
    source = rel["source"]
    target = rel["target"]
    relation = rel["relation"]

    query = f"""
    MATCH (s {{id: $source}}), (t {{id: $target}})
    MERGE (s)-[:{relation}]->(t)
    RETURN type(r) AS relation;
    """

    print(query)


    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:PART_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:PART_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:TYPE_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:PART_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:TYPE_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:PART_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:TYPE_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:PART_OF]->(t)
    RETURN type(r) AS relation;
    

    MATCH (s {id: $source}), (t {id: $target})
    MERGE (s)-[:TYPE_OF]->(t)
  