In [9]:
import os
import pandas as pd
from py2neo import Graph, Node, Relationship

In [2]:
# Connect to the Neo4j database
NEO4J_URL = 'bolt://localhost:7687'
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = 'your_password_here'
graph = Graph(NEO4J_URL, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

In [3]:
# Load the main data
file_path = "Neo4j_mapping_data.xlsx"
data = pd.read_excel(file_path, header=0)
data1 = pd.read_excel(file_path, sheet_name="Inventory_data", header=0)
data2 = pd.read_excel(file_path, sheet_name="LCIA_result_data", header=0)

In [4]:
# Iterate through the dataset
for index, row in data.iterrows():
    # Create nodes with properties
    Sector = Node("Sector", name=str(row['Sector']))
    Reference_Product = Node("Reference_Product", name=str(row['Reference product']))
    Title = Node("Paper_Title", name=str(row['Title']), author=str(row['Author']), published_year=row['Published year'], doi=str(row['Doi']))
    System_Boundary = Node("System_Boundary", name=str(row['System boundary']))
    FunctionalUnit_Product = Node("FunctionalUnit_Product", name=str(row['FunctionalUnit Product']), merge_key = row['New_ID'])
    FunctionalUnit_Quantity = Node("FunctionalUnit_Quantity", value=float(row['FunctionalUnit Quantity']), merge_key = row['New_ID'])
    FunctionalUnit_Unit = Node("FunctionalUnit_Unit", name=str(row['FunctionalUnit Unit']), merge_key = row['New_ID'])
    
    Geography = Node("Geography", name=str(row['Geography']))
    
    merge_key = f"{row['New_ID']}|{row['LCI inventory pathway']}"
    Activity = Node("Activity", name=str(row['Inventory activity']), merge_key=merge_key, technical_pathway=str(row['LCI inventory pathway']), table_location=str(row['LCI inventory location']), page_number = row['Page number'])

    # Build relationship types
    Has_Reference_Product = Relationship.type('Produces')
    Has_Sector = Relationship.type('Has_Sector')
    Has_System_Boundary = Relationship.type("Has_System_Boundary")
    Has_FU_Quantity = Relationship.type("Has_Functional_Unit_Quantity")
    Has_FU_Unit = Relationship.type("Has_Functional_Unit_Unit")
    Has_FU_Product = Relationship.type("Has_Functional_Unit_Product")
    Has_Geography = Relationship.type("Has_Geographical_Location")
    Reports_LCI = Relationship.type("Reports_LCI")
 
    # Build relationships
    r1 = Has_System_Boundary(Title, System_Boundary, name='Has_System_Boundary')
    r2 = Has_FU_Quantity(Title, FunctionalUnit_Quantity, name='Has_Functional_Unit_Quantity')
    r3 = Has_FU_Unit(FunctionalUnit_Quantity, FunctionalUnit_Unit, name='Has_Functional_Unit_Unit')
    r4 = Has_FU_Product(FunctionalUnit_Unit, FunctionalUnit_Product, name='Has_Functional_Unit_Product')
    r5 = Has_Geography(Title, Geography, name='Has_Geographical_Location')
    r6 = Reports_LCI(Title, Activity, name='Reports_LCI')
    r7 = Has_Reference_Product(Activity, Reference_Product, name='Produces')
    r8 = Has_Sector(Reference_Product, Sector, name='Has_Sector')
 
    # Merge nodes based on properties
    graph.merge(Title, "Paper_Title", "name") 
    graph.merge(System_Boundary, "System_Boundary", "name") 
    graph.merge(Geography, "Geography", "name")
    graph.merge(Sector, "Sector", "name")
    graph.merge(Reference_Product, "Reference_Product", "name")
    graph.merge(FunctionalUnit_Product, "FunctionalUnit_Product", "merge_key")
    graph.merge(FunctionalUnit_Quantity, "FunctionalUnit_Quantity", "merge_key")
    graph.merge(FunctionalUnit_Unit, "FunctionalUnit_Unit", "merge_key")
    
    # Create relationships
    graph.create(r1 | r2 | r3 | r4 | r5 | r6 | r7 | r8) 

for index, row in data1.iterrows():
    # Create nodes with properties
    merge_key = f"{row['New_ID']}|{row['LCI inventory pathway']}"
    Activity = Node("Activity", name=str(row['Inventory activity']), merge_key=merge_key, inventory_id=row['New_ID'], technical_pathway=str(row['LCI inventory pathway']))
    
    Flow = Node("Flow", name=str(row['Flow name']), is_reference_flow=row['Is_reference_flow'])
    Flow_Amount = Node("Amount", value=float(row['Amount']))
    Flow_Unit = Node("Unit", name=str(row['Unit']))
 
    # Build relationship types
    Has_Input_Flow = Relationship.type("Has_Input_Flow")
    Has_Output_Flow = Relationship.type("Has_Output_Flow")
    Has_Value = Relationship.type("Has_Value")
    Has_Unit = Relationship.type("Has_Unit")

    # Build relationships
    if row['Type'] == 'Has_Input':
        r1 = Has_Input_Flow(Activity, Flow, name="Has_Input_Flow")
    elif row['Type'] == 'Has_Output':
        r2 = Has_Output_Flow(Activity, Flow, name="Has_Output_Flow")
        
    r3 = Has_Value(Flow, Flow_Amount, name="Has_Value")
    r4 = Has_Unit(Flow_Amount, Flow_Unit, name="Has_Unit")
 
    # Merge nodes based on properties
    graph.merge(Activity, "Activity", "merge_key")

    # Create relationships
    graph.create(r1 | r2 | r3 | r4 )

for index, row in data2.iterrows():
    # Create nodes with properties
    Reference_Product = Node("Reference_Product", name=str(row['Reference product']))
    if str(row['has_impact_result']).strip().lower() == 'yes':
        LCIA_Result = Node("LCIA_Result", name='LCIA result', technical_pathway=str(row['Technical pathway']))
    Impact_Category = Node("Impact_Category", name=str(row['Impact category']))
    Impact_Category_Value = Node("Impact_Category_Value", value=float(row['Impact category value']))
    Impact_Category_Unit = Node("Impact_Category_Unit", name=str(row['Impact category unit']))
    LCIA_Method = Node("LCIA_Method", name=str(row['LCIA Method']), merge_key=row['New_ID'])
    Title = Node("Paper_Title", name=str(row['Title']))

    # Build relationship types
    Has_LCIA_Result = Relationship.type("Has_LCIA_Result")
    Has_Impact_Category = Relationship.type("Has_Impact_Category")
    Has_LCIA_Method = Relationship.type("Has_LCIA_Method")
    Has_Impact_Category_Value = Relationship.type("Has_Impact_Category_Value")
    Has_Impact_Category_Unit = Relationship.type("Has_Impact_Category_Unit")
    Reports_LCIA_Results = Relationship.type("Reports_LCIA_Result")
    
    # Build relationships
    r1 = Has_LCIA_Result(Reference_Product, LCIA_Result, name='Has_LCIA_Result')
    r2 = Reports_LCIA_Results(Title, LCIA_Result, name='Reports_LCIA_Results')
    r3 = Has_Impact_Category(LCIA_Result, Impact_Category, name='Has_Impact_Category') 
    r4 = Has_LCIA_Method(Impact_Category, LCIA_Method, name="Has_LCIA_Method")
    r5 = Has_Impact_Category_Value(Impact_Category, Impact_Category_Value, name="Has_Impact_Category_Value")
    r6 = Has_Impact_Category_Unit(Impact_Category_Value, Impact_Category_Unit, name="Has_Impact_Category_Unit")
 
    # Merge nodes based on properties
    graph.merge(Reference_Product, "Reference_Product", "name")
    graph.merge(Title, "Paper_Title", "name")
    graph.merge(LCIA_Method, "LCIA_Method", "merge_key")
 
    graph.create(r1 | r2 | r3 | r4 | r5 | r6)

query = """
MATCH (n)
WHERE n.name = '/'
DETACH DELETE n
RETURN COUNT(n) AS DeletedNodes
"""
graph.run(query)

graph.run("""
MATCH (n)
WHERE n:Activity
   OR n:LCIA_Method
   OR n:FunctionalUnit_Product
   OR n:FunctionalUnit_Quantity
   OR n:FunctionalUnit_Unit
REMOVE n.merge_key
""")

print("Finished!")

Finished!
