In [77]:
import rdflib
from neo4j import GraphDatabase
import csv
import os
import json
import glob

## Load Json file to Create nodes and relations in Neo4j

In [88]:
uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jkngbq"
driver = GraphDatabase.driver(uri, auth=(username, password))

def get_all_json_paths(folder_path):
  """
  Retrieves all file paths ending with the '.json' extension within a folder and its subfolders using glob.

  Args:
      folder_path (str): The path to the folder where the search starts.

  Returns:
      list: A list containing absolute paths to all JSON files found.
  """

  # Construct a pattern to match all JSON files recursively
  json_pattern = os.path.join(folder_path, "**/*.json")

  # Use glob.glob to find all matching files
  json_paths = glob.glob(json_pattern, recursive=True)

  return json_paths

"""
    This methods loads the json data from the given file path and return the json data as dictionary
"""
def load_json_data_from_file(file_path):
    with open(file_path, "r") as f:
        data = f.read()
        json_data = json.loads(data)
    return json_data

In [89]:

def create_interface_node(tx, interface_data):
    unique_id = interface_data.get("id")
    tx.run("MERGE (n:Interface {id:$unique_id}) SET n=$data", data=interface_data, unique_id=unique_id)
    
def create_property_node(tx, property_data):
    unique_id = property_data.get("id")
    tx.run("MERGE (n:Property {id:$unique_id}) SET n=$data", data = property_data, unique_id=unique_id)
    
def create_nodes(tx,data):
    interface_node_data = {}
    interface_node_data["id"] = data.get("@id")
    interface_node_data["type"] = data.get("@type")
    interface_node_data["name"] = data.get("displayName")
    interface_node_data["description"] = data.get("description")
    interface_node_data["comment"] = data.get("comment")
    
    create_interface_node(tx, interface_node_data)
    
    if(None != data.get('extends')):
        super_ids = data.get('extends')
        interface_id = interface_node_data["id"]
        for super_id in super_ids:
            tx.run("MERGE (n:Interface {id:$super_id}) ON CREATE SET n = {id:$super_id}", super_id=super_id)
            
            tx.run("MATCH (parent:Interface {id:$super_id}), (child:Interface {id:$interface_id}) CREATE (parent)-[:has_child]->(child)", super_id=super_id, interface_id=interface_id)

    if(None != data.get('contents') or (len(data.get('contents'))>0)):
        for content in data.get('contents'):
            if("Property" == content.get("@type")):
                property_node_data = {}
                property_node_data['id'] = content.get('name')
                property_node_data['name'] = content.get('name')
                property_node_data['type'] = content.get('@type')
                property_node_data['comment'] = content.get('comment')
                if(isinstance(content.get('schema'), dict)):
                    property_node_data['schema'] = "enum"
                else:
                    property_node_data['schema'] = content.get('schema')
                
                create_property_node(tx, property_node_data)
                
                interface_id = interface_node_data["id"]
                property_id = property_node_data['id']
                
                #query = """ MATCH (interface:Interface {{id:$interface_id})
                 #           MATCH (property:Property {{id:$property_id}})
                  #          CREATE (interface)-[:has_property]->(property)
                   #     """
            
                tx.run("MATCH (interface:Interface {id:$interface_id}), (property:Property {id:$property_id}) CREATE (interface)-[:has_property]->(property)", interface_id=interface_id, property_id=property_id)
                       
            elif("Relationship" == content.get("@type")):
                relation_node_data = {}
                relation_node_data['id'] = content.get('target')
                relation_node_data['name'] = content.get('displayName')
                relation_node_data['comment'] = content.get('comment')
                relation_node_data['@type'] = "Interface"
                create_interface_node(tx, relation_node_data)
                
                
                interface_id = interface_node_data["id"]
                relation_node_id = relation_node_data['id']
                
                tx.run("MATCH (interface:Interface {id:$interface_id}), (property:Interface {id:$relation_node_id}) CREATE (interface)-[:connected_to]->(property)",interface_id=interface_id, relation_node_id=relation_node_id)
              

    
    

# Load the Digital Twin Ontology to Neo4j

In [90]:
folder_path = "./Ontology" 
json_file_paths = get_all_json_paths(folder_path)

for each_json in json_file_paths:
    data = load_json_data_from_file(each_json)

    with driver.session() as session:
        session.write_transaction(create_nodes, data)
        
    driver.close() 
    

  session.write_transaction(create_nodes, data)
  with driver.session() as session:


['dtmi:digitaltwins:ngsi_ld:cim:energy:ConnectivityNodeContainer;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1', 'dtmi:digitaltwins:ngsi_ld:cim:energy:Location;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1', 'dtmi:digitaltwins:ngsi_ld:cim:energy:Location;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:EquipmentContainer;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:BasicIntervalSchedule;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:EquipmentContainer;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:IdentifiedObject;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:ACDCTerminal;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:PowerSystemResource;1']
['dtmi:digitaltwins:ngsi_ld:cim:energy:Po

# Create Text Embeddings for a given graph

In [86]:
def get_connected_nodes_and_relations(tx):
    results = []
    interface_label = "Interface"
    rel_types=("has_child","connected_to")

    cursor = tx.run("""
      MATCH (n:Interface)
      WITH n, n.id AS id, n.comment as comment,n.name as name, n.description as description, n.type as type 
      OPTIONAL MATCH (n)-[r:has_child|connected_to]->(connected)
      RETURN n, collect(DISTINCT {node: connected, rel_type: type(r)}) AS connectedInfo
    """)

    for record in cursor:

        node = record["n"]

        connected_info = record["connectedInfo"]

        results.append({"node": node, "connectedInfo": connected_info})

    return results
#check if the data start with Adapted from CIM this is unwanted data so we do not want to embed them 
def is_unwanted_data(data, start_with = "Adapted from CIM"):
    if(data.startswith(start_with)):
        return True
    return False


def get_node_text_format(node_id, node_properties, node_dict):

    if(None != node_dict.get(node_id)):
        return node_dict.get('text')
    else:
        text_to_embed = ""
        if(node_properties.get('name')):
            text_to_embed+=node_properties.get('name')
        if(node_properties.get('comment') and not is_unwanted_data(node_properties.get('comment'))):
            comment_to_add = node_properties.get('comment')
            text_to_embed+=  "(" + comment_to_add + ")"
        if(node_properties.get('description') and not is_unwanted_data(node_properties.get('description'))):
            comment_to_add = node_properties.get('description')
            text_to_embed+=  "(" + comment_to_add + ")"
        node_dict[node_id] = text_to_embed
        return text_to_embed

def get_node_id = 


node_list = []
with driver.session() as session:
    node_list = session.write_transaction(get_connected_nodes_and_relations)
    driver.close() 
    
nodes_to_embed = []
node_dict = {}
for data in node_list:
    #this gets the node details form the result
    node = data["node"]
    #this stores both the connection node as its relation with the Node as a list
    connected_info = data["connectedInfo"]
    #get node properties like id, name, comment, description as a dictionary
    node_properties = node._properties
    
    node_details = {}
    
    node_details['id'] =node_properties.get('id')

    text_to_embed = get_node_text_format(node_properties, node_dict)
    
    
    if (connected_info):
        has_child = []
        connected_to = []
        for info in connected_info:
            if(None != info["node"] and None != info["rel_type"]):
                
                connected_node = info["node"]
                rel_type = info["rel_type"]
                node_text = get_node_text_format(connected_node,node_dict)
                if(rel_type == "has_child"):
                    has_child.append(node_text)
                elif(rel_type == "connected_to"):
                    connected_to.append(node_text)
            
        if(has_child):
            child_text = ", ".join(has_child)
            text_to_embed += " has the following children:" + child_text
        if(connected_to):
            connected_text = ", ".join(connected_to)
            text_to_embed += ", is connected to:" + connected_text

        node_details['text_to_embed'] = text_to_embed 
        nodes_to_embed.append(node_details)           

    else:
        print("No connected nodes found.")
if(nodes_to_embed):
    print(nodes_to_embed)

dtmi:digitaltwins:ngsi_ld:cim:energy:EquipmentContainer;1
dtmi:digitaltwins:ngsi_ld:cim:energy:Equipment;1
['dtmi:digitaltwins:ngsi_ld:cim:energy:ConnectivityNodeContainer;1']


  with driver.session() as session:
  node_list = session.write_transaction(get_connected_nodes_and_relations)


TypeError: unhashable type: 'list'

# DELETE ALL RECORDS

In [87]:
# DELETE ALL RECORDS
#Do not run this until experiment is compelete
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
username = "neo4j"
password = "neo4jkngbq"
database = "neo4j"

def clear_database(tx):
    tx.run("MATCH (n) DETACH DELETE n")

try:
    with GraphDatabase.driver(uri, auth=(username, password), database=database) as driver:
        with driver.session() as session:
            session.write_transaction(clear_database)

except Exception as e:
    print(f"Error: {e}")


  session.write_transaction(clear_database)
