In [92]:
import rdflib
from neo4j import GraphDatabase
import csv
import os
import json
import glob

## Load Json file to Create nodes and relations in Neo4j

In [93]:
uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jkngbq"
driver = GraphDatabase.driver(uri, auth=(username, password))

def get_all_json_paths(folder_path):
    """
    Retrieves all file paths ending with the '.json' extension within a folder and its subfolders using glob.

    Args:
      folder_path (str): The path to the folder where the search starts.

    Returns:
      list: A list containing absolute paths to all JSON files found.
    """

    # Construct a pattern to match all JSON files recursively
    json_pattern = os.path.join(folder_path, "**/*.json")

    # Use glob.glob to find all matching files
    json_paths = glob.glob(json_pattern, recursive=True)

    return json_paths

"""
    This methods loads the json data from the given file path and return the json data as dictionary
"""
def load_json_data_from_file(file_path):
    with open(file_path, "r") as f:
        data = f.read()
        json_data = json.loads(data)
    return json_data

In [94]:

def create_interface_node(tx, interface_data):
    unique_id = interface_data.get("id")
    tx.run("MERGE (n:Interface {id:$unique_id}) SET n=$data", data=interface_data, unique_id=unique_id)
    
def create_property_node(tx, property_data):
    unique_id = property_data.get("id")
    tx.run("MERGE (n:Property {id:$unique_id}) SET n=$data", data = property_data, unique_id=unique_id)
    
def create_nodes(tx,data):
    interface_node_data = {}
    interface_node_data["id"] = data.get("@id")
    interface_node_data["type"] = data.get("@type")
    interface_node_data["name"] = data.get("displayName")
    interface_node_data["description"] = data.get("description")
    interface_node_data["comment"] = data.get("comment")
    
    create_interface_node(tx, interface_node_data)
    
    if(None != data.get('extends')):
        super_ids = data.get('extends')
        interface_id = interface_node_data["id"]
        for super_id in super_ids:
            tx.run("MERGE (n:Interface {id:$super_id}) ON CREATE SET n = {id:$super_id}", super_id=super_id)
            
            tx.run("MATCH (parent:Interface {id:$super_id}), (child:Interface {id:$interface_id}) CREATE (parent)-[:has_child]->(child)", super_id=super_id, interface_id=interface_id)

    if(None != data.get('contents') or (len(data.get('contents'))>0)):
        for content in data.get('contents'):
            if("Property" == content.get("@type")):
                property_node_data = {}
                property_node_data['id'] = content.get('name')
                property_node_data['name'] = content.get('name')
                property_node_data['type'] = content.get('@type')
                property_node_data['comment'] = content.get('comment')
                if(isinstance(content.get('schema'), dict)):
                    property_node_data['schema'] = "enum"
                else:
                    property_node_data['schema'] = content.get('schema')
                
                create_property_node(tx, property_node_data)
                
                interface_id = interface_node_data["id"]
                property_id = property_node_data['id']
                
                #query = """ MATCH (interface:Interface {{id:$interface_id})
                 #           MATCH (property:Property {{id:$property_id}})
                  #          CREATE (interface)-[:has_property]->(property)
                   #     """
            
                tx.run("MATCH (interface:Interface {id:$interface_id}), (property:Property {id:$property_id}) CREATE (interface)-[:has_property]->(property)", interface_id=interface_id, property_id=property_id)
                       
            elif("Relationship" == content.get("@type")):
                relation_node_data = {}
                relation_node_data['id'] = content.get('target')
                relation_node_data['name'] = content.get('displayName')
                relation_node_data['comment'] = content.get('comment')
                relation_node_data['@type'] = "Interface"
                create_interface_node(tx, relation_node_data)
                
                
                interface_id = interface_node_data["id"]
                relation_node_id = relation_node_data['id']
                
                tx.run("MATCH (interface:Interface {id:$interface_id}), (property:Interface {id:$relation_node_id}) CREATE (interface)-[:connected_to]->(property)",interface_id=interface_id, relation_node_id=relation_node_id)
              

    
    

# Load the Digital Twin Ontology to Neo4j

In [95]:
folder_path = "./Ontology" 
json_file_paths = get_all_json_paths(folder_path)

for each_json in json_file_paths:
    data = load_json_data_from_file(each_json)

    with driver.session() as session:
        session.write_transaction(create_nodes, data)
        
    driver.close() 
    

  session.write_transaction(create_nodes, data)
  with driver.session() as session:


# Create Node Text Csv for a given graph

In [108]:
"""
    Cypher query to retrive all nodes in interface and its corresponding connected nodes that have
    either has_child, or connected_to relation
"""

def get_connected_nodes_and_relations(tx):
    results = []
    interface_label = "Interface"
    rel_types=("has_child","connected_to")

    cursor = tx.run("""
      MATCH (n:Interface)
      WITH n, n.id AS id, n.comment as comment,n.name as name, n.description as description, n.type as type 
      OPTIONAL MATCH (n)-[r:has_child|connected_to]->(connected)
      RETURN n, collect(DISTINCT {node: connected, rel_type: type(r)}) AS connectedInfo
    """)

    for record in cursor:

        node = record["n"]

        connected_info = record["connectedInfo"]

        results.append({"node": node, "connectedInfo": connected_info})

    return results
#check if the data start with Adapted from CIM this is unwanted data so we do not want to embed them 
def is_unwanted_data(data, start_with = "Adapted from CIM"):
    if(data.startswith(start_with)):
        return True
    return False

"""
    This method will return the textual format of nodes 
    the format is as bellow
    node.name(either node.comment (or) node.description) 
"""
def get_node_text_format(node_properties, node_dict):

    if(None != node_dict.get(node_properties.get('id'))):
        
        return node_dict.get(node_properties.get('id'))
    else:
        text_to_embed = ""
        if(node_properties.get('name')):
            text_to_embed+=node_properties.get('name')
        if(node_properties.get('comment') and not is_unwanted_data(node_properties.get('comment'))):
            comment_to_add = node_properties.get('comment')
            text_to_embed+=  "(" + comment_to_add + ")"
        if(node_properties.get('description') and not is_unwanted_data(node_properties.get('description'))):
            comment_to_add = node_properties.get('description')
            text_to_embed+=  "(" + comment_to_add + ")"
        node_dict[node_properties.get('id')] = text_to_embed
        return text_to_embed

"""
    This method will retun the Interface node id, and the related text that need to be embeded 
    for the text data we are getting the node and its connected node information
"""
def get_nodes_and_text_to_embed():
    # this stores the node and connection information fro the neo4j database
    node_list = []
    with driver.session() as session:
        node_list = session.write_transaction(get_connected_nodes_and_relations)
        driver.close() 

    #this is the attribute that we will return that store list of {id:, text_to_embed:}
    nodes_to_embed = []
    
    # this is used to have node related information had {key: node_id, value: get_node_text_format return value}
    node_dict = {}
    
    for data in node_list:
        #this gets the node details form the result
        node = data["node"]
        #this stores both the connection nodes as its relationship with the Node as a list
        connected_info = data["connectedInfo"]
        #get node properties like id, name, comment, description as a dictionary
        node_properties = node._properties
        
        #this will store {id:, text_to_embed:} ans will be appended to nodes_to_embed
        node_details = {}

        node_details['id'] =node_properties.get('id')

        #text_to_embed store the textual format od the node
        text_to_embed = get_node_text_format(node_properties, node_dict)


        if (connected_info):
            #seperating child relation and the connected to relation
            has_child = []
            connected_to = []
            for info in connected_info:
                
                if(None != info["node"] and None != info["rel_type"]):

                    connected_node = info["node"]
                    rel_type = info["rel_type"]
                    
                    #get text information for each connection node
                    node_text = get_node_text_format(connected_node,node_dict)

                    if(rel_type == "has_child" and node_text):
                        has_child.append(node_text)
                        
                    elif(rel_type == "connected_to" and node_text):
                        connected_to.append(node_text)

            if(len(has_child)>0):
                child_text = ", ".join(has_child)
                text_to_embed += " has the following children:" + child_text
            if(len(connected_to)>0):
                connected_text = ", ".join(connected_to)
                text_to_embed += ", is connected to:" + connected_text

            node_details['text_to_embed'] = text_to_embed 
            nodes_to_embed.append(node_details)           

        else:
            print("No connected nodes found.")
    return nodes_to_embed

In [111]:
#Run once to generate the csv file
import pandas as pd
nodes_to_embed_dict = get_nodes_and_text_to_embed()
if(nodes_to_embed_dict):
    df = pd.DataFrame(nodes_to_embed_dict)
    df.to_csv("./node_texts.csv",index=False)

  with driver.session() as session:
  node_list = session.write_transaction(get_connected_nodes_and_relations)


# Load Node Embeddings to Neo4j

In [118]:
import pandas as pd
import os

uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jkngbq"
database = "neo4j"
driver = GraphDatabase.driver(uri, auth=(username, password))
def get_dicts_from_csv_file(file_name):
    if not os.path.isfile(file_name):
        print(f"File '{file_name}' does not exist.")
        return None
    file_path = "./"+file_name
    df = pd.read_csv(file_path)

    return df.to_dict("records")

def set_embedding_to_node(tx, node_data):
    nodeId = node_data.get('id')
    embeddings = node_data.get('embeddings')
    tx.run("MATCH (node:Interface {id: $nodeId}) SET node.embeddings = $embeddings RETURN node",nodeId=nodeId,embeddings=embeddings)
    

def load_embeddings_from_file_to_neo4j(file_name):
    node_emb_dict_list = get_dicts_from_csv_file(file_name)
    if(None != node_emb_dict_list):
        for node_dict in node_emb_dict_list:
            if(None != node_dict.get("embeddings") and not pd.isna(node_dict.get("embeddings"))):
                try:
                    with driver.session() as session:
                        session.write_transaction(set_embedding_to_node, node_dict)

                except Exception as e:
                    print(f"Error: {e}")
        driver.close() 
    print("Loaded successsfully")


In [119]:
load_embeddings_from_file_to_neo4j("node_texts_with_embeddings.csv")

  session.write_transaction(set_embedding_to_node, node_dict)


Loaded successsfully


# *DELETE ALL RECORDS*

In [91]:
# DELETE ALL RECORDS
#Do not run this until experiment is compelete
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jkngbq"
database = "neo4j"

def clear_database(tx):
    tx.run("MATCH (n) DETACH DELETE n")

try:
    with GraphDatabase.driver(uri, auth=(username, password), database=database) as driver:
        with driver.session() as session:
            session.write_transaction(clear_database)

except Exception as e:
    print(f"Error: {e}")


  session.write_transaction(clear_database)
