In [13]:
from neo4j import GraphDatabase
import json

In [14]:
import os
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Read the credentials from the environment
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASS")

# Check that all values are loaded correctly
assert uri is not None, "NEO4J_URI not found in .env"
assert username is not None, "NEO4J_USER not found in .env"
assert password is not None, "NEO4J_PASS not found in .env"

# Create a connection to Neo4j
driver = GraphDatabase.driver(uri, auth=(username, password))

# Test connection
try:
    with driver.session() as session:
        result = session.run("RETURN 'Connected to Neo4J!' AS message")
        print(result.single()["message"])
except Exception as e:
    print("Connection failed:", e)
finally:
    driver.close()

Connected to Neo4J!


In [4]:
import json
import os

folder_path = "/Users/wbm/Documents/BIT/Research Topics/few-shot-prompting"

# File paths
file_20_cases = os.path.join(folder_path, "baml_extracted_20_cases.json")
file_remaining = os.path.join(folder_path, "baml_extracted_remaining_cases.json")

# Load both JSON files
with open(file_20_cases, "r") as f:
    data_20_cases = json.load(f)

with open(file_remaining, "r") as f:
    data_remaining_cases = json.load(f)

# Optional: print summary
print(f"Loaded {len(data_20_cases)} cases from 20_cases file.")
print(f"Loaded {len(data_remaining_cases)} cases from remaining_cases file.")

Loaded 20 cases from 20_cases file.
Loaded 87 cases from remaining_cases file.


In [7]:
def create_fault_graph(tx, data):
    # 1. Merge FaultLocation and add machine info to a list
    tx.run("""
        MERGE (fl:FaultLocation {name: $loc_name})
        SET fl.machines = apoc.coll.toSet(coalesce(fl.machines, []) + [$machine])
        """,
        loc_name=data["fault_location"]["name"],
        machine=data["fault_location"]["machine"]
    )

    # 2. Create FaultSymptoms and link to FaultLocation
    for symptom in data["fault_symptoms"]:
        tx.run("""
            MERGE (fs:FaultSymptom {description: $symptom})
            WITH fs
            MATCH (fl:FaultLocation {name: $loc_name})
            MERGE (fl)-[:HAS_FAULT]->(fs)
            """,
            symptom=symptom,
            loc_name=data["fault_location"]["name"]
        )

        # 3. Link FaultReasons to FaultSymptom
        for reason in data["fault_reason"]:
            tx.run("""
                MERGE (fr:FaultReason {name: $reason_name})
                WITH fr
                MATCH (fs:FaultSymptom {description: $symptom})
                MERGE (fs)-[:CAUSED_BY]->(fr)
                """,
                reason_name=reason["name"],
                symptom=symptom
            )

        # 4. Link FaultMeasures to FaultSymptom, include resolution status
        for measure in data["fault_measures"]:
            tx.run("""
                MERGE (fm:FaultMeasure {description: $measure_desc})
                WITH fm
                MATCH (fs:FaultSymptom {description: $symptom})
                MERGE (fs)-[r:MITIGATED_BY]->(fm)
                SET r.resolution_status = $status
                """,
                measure_desc=measure["description"],
                symptom=symptom,
                status=data["resolution_status"]
            )

# Run it
with driver.session() as session:
    session.write_transaction(create_fault_graph, data)

driver.close()

  with driver.session() as session:
  session.write_transaction(create_fault_graph, data)


In [None]:
from neo4j import GraphDatabase
import json
import os

# -------------------------
# Load the JSON files
# -------------------------
folder_path = "/Users/wbm/Documents/BIT/Research Topics/few-shot-prompting"
file_20_cases = os.path.join(folder_path, "baml_extracted_20_cases.json")
file_remaining = os.path.join(folder_path, "baml_extracted_remaining_cases.json")

with open(file_20_cases, "r") as f:
    data_20_cases = json.load(f)

with open(file_remaining, "r") as f:
    data_remaining_cases = json.load(f)

print(f"Loaded {len(data_20_cases)} cases from 20_cases file.")
print(f"Loaded {len(data_remaining_cases)} cases from remaining_cases file.")

# -------------------------
# Neo4j Connection Setup
# -------------------------
# Load environment variables from .env
load_dotenv()

# Read connection info from environment
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASS")

assert uri, "Missing NEO4J_URI"
assert username, "Missing NEO4J_USER"
assert password, "Missing NEO4J_PASS"

driver = GraphDatabase.driver(uri, auth=(username, password))

# -------------------------
# Define how to create nodes and relationships
# -------------------------
def create_fault_graph(tx, data):
    loc_name = data["fault_location"]["name"]
    machine = data["fault_location"]["machine"]

    if machine:  # Only add machine if it's not None or empty
        tx.run("""
            MERGE (fl:FaultLocation {name: $loc_name})
            SET fl.machines = apoc.coll.toSet(coalesce(fl.machines, []) + [$machine])
            """,
            loc_name=loc_name,
            machine=machine
        )
    else:
        tx.run("""
            MERGE (fl:FaultLocation {name: $loc_name})
            """,
            loc_name=loc_name
        )

    for symptom in data["fault_symptoms"]:
        tx.run("""
            MERGE (fs:FaultSymptom {description: $symptom})
            WITH fs
            MATCH (fl:FaultLocation {name: $loc_name})
            MERGE (fl)-[:HAS_FAULT]->(fs)
            """,
            symptom=symptom,
            loc_name=data["fault_location"]["name"]
        )

        for reason in data["fault_reason"]:
            tx.run("""
                MERGE (fr:FaultReason {name: $reason_name})
                WITH fr
                MATCH (fs:FaultSymptom {description: $symptom})
                MERGE (fs)-[:CAUSED_BY]->(fr)
                """,
                reason_name=reason["name"],
                symptom=symptom
            )

        for measure in data["fault_measures"]:
            tx.run("""
                MERGE (fm:FaultMeasure {description: $measure_desc})
                WITH fm
                MATCH (fs:FaultSymptom {description: $symptom})
                MERGE (fs)-[r:MITIGATED_BY]->(fm)
                SET r.resolution_status = $status
                """,
                measure_desc=measure["description"],
                symptom=symptom,
                status=data["resolution_status"]
            )

# -------------------------
# Push both datasets to Neo4j
# -------------------------
with driver.session() as session:
    for entry in data_20_cases + data_remaining_cases:
        if "result" in entry:
            data = entry["result"]
            session.write_transaction(create_fault_graph, data)
        else:
            print("Skipping entry without 'result':", entry)

driver.close()
print("All data pushed to Neo4j successfully!")


Loaded 20 cases from 20_cases file.
Loaded 87 cases from remaining_cases file.


  session.write_transaction(create_fault_graph, data)


All data pushed to Neo4j successfully!


In [9]:
for i, entry in enumerate(data_20_cases + data_remaining_cases):
    if "fault_location" not in entry:
        print(f"Missing 'fault_location' in entry #{i}")
        print(json.dumps(entry, indent=2))

Missing 'fault_location' in entry #0
{
  "case_id": "IBM3_C15_23-Feb-15_23-Feb-15",
  "result": {
    "fault_location": {
      "name": "cryocompressor",
      "machine": "IBM3"
    },
    "fault_symptoms": [
      "He-verlies/tekort",
      "Displacer maakt een wat hoog piepend/schrapend geluid"
    ],
    "fault_reason": [],
    "fault_measures": [],
    "resolution_status": "Unknown"
  }
}
Missing 'fault_location' in entry #1
{
  "case_id": "IBM3_C26_21-Sep-16_22-Sep-16",
  "result": {
    "fault_location": {
      "name": "Armkanteling",
      "machine": "IBM3"
    },
    "fault_symptoms": [
      "Armkanteling werkt niet",
      "Shutterpositie indicatie: tussenpositie"
    ],
    "fault_reason": [
      {
        "name": "Shutterpos. In tussen positie is, werkt armkanteling niet"
      }
    ],
    "fault_measures": [
      {
        "description": "JP6 los en pen 2 en 3 aan elkaar gesoldeerd"
      }
    ],
    "resolution_status": "Unknown"
  }
}
Missing 'fault_location' in ent

In [12]:
# IMPORT ENTITIES FROM ENGLISH MANUAL BOOK

# Load the manual_book_fault_reports.json
# -------------------------
file_manual_book = os.path.join(folder_path, "manual_book_fault_reports.json")

with open(file_manual_book, "r") as f:
    data_manual_book = json.load(f)

print(f"Loaded {len(data_manual_book)} cases from manual_book_fault_reports.json")

with driver.session() as session:
    for entry in data_manual_book:
        if "fault_location" in entry:  # because this file doesn't wrap each entry in {"result": ...}
            session.write_transaction(create_fault_graph, entry)
        else:
            print("⚠️ Skipped invalid entry:", entry)

print("✅ Manual book data imported into Neo4j successfully!")


Loaded 28 cases from manual_book_fault_reports.json


  with driver.session() as session:
  session.write_transaction(create_fault_graph, entry)


✅ Manual book data imported into Neo4j successfully!


CONNECTING TO LOCAL DB

In [None]:
from neo4j import GraphDatabase

# Local Neo4j settings
uri = "bolt://localhost:7687"
username = "neo4j"
password = "test12345"

# Connect to Neo4j
driver = GraphDatabase.driver(uri, auth=(username, password))

# Quick test: run a Cypher query
with driver.session() as session:
    result = session.run("RETURN 'Neo4j connection successful!' AS message")
    print(result.single()["message"])

driver.close()

Neo4j connection successful!


In [12]:
import os
from dotenv import load_dotenv
import json

def create_fault_graph(tx, data):
    loc_name = data["fault_location"]["name"]
    machine = data["fault_location"].get("machine", None)
    # Create or merge FaultLocation node
    if machine:
        tx.run(
            """
            MERGE (fl:FaultLocation {name: $loc_name})
            SET fl.machines = apoc.coll.toSet(coalesce(fl.machines, []) + [$machine])
            """,
            loc_name=loc_name,
            machine=machine
        )
    else:
        tx.run(
            """
            MERGE (fl:FaultLocation {name: $loc_name})
            """,
            loc_name=loc_name
        )

    # Fault Symptoms, Reasons, Measures
    for symptom in data.get("fault_symptoms", []):
        tx.run(
            """
            MERGE (fs:FaultSymptom {description: $symptom})
            WITH fs
            MATCH (fl:FaultLocation {name: $loc_name})
            MERGE (fl)-[:HAS_FAULT]->(fs)
            """,
            symptom=symptom,
            loc_name=loc_name
        )
        # Fault Reason
        for reason in data.get("fault_reason", []):
            tx.run(
                """
                MERGE (fr:FaultReason {name: $reason_name})
                WITH fr
                MATCH (fs:FaultSymptom {description: $symptom})
                MERGE (fs)-[:CAUSED_BY]->(fr)
                """,
                reason_name=reason["name"],
                symptom=symptom
            )
        # Fault Measure
        for measure in data.get("fault_measures", []):
            tx.run(
                """
                MERGE (fm:FaultMeasure {description: $measure_desc})
                WITH fm
                MATCH (fs:FaultSymptom {description: $symptom})
                MERGE (fs)-[r:MITIGATED_BY]->(fm)
                SET r.resolution_status = $status
                """,
                measure_desc=measure["description"],
                symptom=symptom,
                status=data.get("resolution_status", "Unknown")
            )

def load_json_file(filepath):
    with open(filepath, "r") as f:
        if filepath.endswith(".jsonl"):
            return [json.loads(line) for line in f]
        else:
            return json.load(f)

folder_path = "/Users/wbm/Documents/BIT/Thesis/graphRAG-industrial-fault-diagnosis/data/processed"
files = [
    "baml_extracted_20_cases.json",
    "baml_extracted_remaining_cases.json",
    "manual_book_fault_reports.json"
]
# Add more files as needed

all_cases = []

for file in files:
    path = os.path.join(folder_path, file)
    print(f"Loading {file} ...")
    data = load_json_file(path)
    all_cases.extend(data)

print(f"Loaded total cases: {len(all_cases)}")

with driver.session() as session:
    for i, entry in enumerate(all_cases):
        # Handle "result" or flat dict
        if "result" in entry:
            data = entry["result"]
        else:
            data = entry
        if "fault_location" in data:
            session.execute_write(create_fault_graph, data)
        else:
            print(f"Skipping case #{i}: missing fault_location")
print("All cases imported into local Neo4j!")
driver.close()


  with driver.session() as session:
[#E849]  _: <CONNECTION> error: Failed to read from defunct connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687))): OSError('No data')
Transaction failed and will be retried in 1.0783075397400381s (Failed to read from defunct connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687))))


Loading baml_extracted_20_cases.json ...
Loading baml_extracted_remaining_cases.json ...
Loading manual_book_fault_reports.json ...
Loaded total cases: 135
All cases imported into local Neo4j!


Pushing New Property "case_id" for each Nodes

This is done for traceability

In [18]:
from neo4j import GraphDatabase
import json
import os

# Load the JSON file (20 cases)
folder_path = "/Users/wbm/Documents/BIT/Thesis/graphRAG-industrial-fault-diagnosis/data/processed"
file_20_cases = os.path.join(folder_path, "baml_extracted_20_cases.json")
file_remaining_cases = os.path.join(folder_path, "baml_extracted_remaining_cases.json")

with open(file_20_cases, "r") as f:
    data_20_cases = json.load(f)

with open(file_remaining_cases, "r") as f:
    data_remaining_cases = json.load(f)

# Setup Neo4j driver
from dotenv import load_dotenv
load_dotenv()
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASS")
driver = GraphDatabase.driver(uri, auth=(username, password))

def update_case_ids(tx, case_id, data):
    # FaultLocation
    tx.run(
        """
        MATCH (n:FaultLocation {name: $loc_name})
        SET n.case_ids = coalesce(n.case_ids, []) + CASE WHEN $case_id IN coalesce(n.case_ids, []) THEN [] ELSE [$case_id] END
        """,
        loc_name=data["fault_location"]["name"],
        case_id=case_id
    )

    # FaultSymptoms
    for symptom in data["fault_symptoms"]:
        tx.run(
            """
            MATCH (n:FaultSymptom {description: $symptom})
            SET n.case_ids = coalesce(n.case_ids, []) + CASE WHEN $case_id IN coalesce(n.case_ids, []) THEN [] ELSE [$case_id] END
            """,
            symptom=symptom,
            case_id=case_id
        )

    # FaultReasons
    for reason in data["fault_reason"]:
        tx.run(
            """
            MATCH (n:FaultReason {name: $reason_name})
            SET n.case_ids = coalesce(n.case_ids, []) + CASE WHEN $case_id IN coalesce(n.case_ids, []) THEN [] ELSE [$case_id] END
            """,
            reason_name=reason["name"],
            case_id=case_id
        )

    # FaultMeasures
    for measure in data["fault_measures"]:
        tx.run(
            """
            MATCH (n:FaultMeasure {description: $measure_desc})
            SET n.case_ids = coalesce(n.case_ids, []) + CASE WHEN $case_id IN coalesce(n.case_ids, []) THEN [] ELSE [$case_id] END
            """,
            measure_desc=measure["description"],
            case_id=case_id
        )

# Now update the graph nodes for all 20 cases
with driver.session() as session:
    for entry in data_20_cases + data_remaining_cases:
        case_id = entry.get("case_id")
        data = entry["result"]
        session.write_transaction(update_case_ids, case_id, data)

driver.close()
print("Case IDs successfully added to all entities (20 cases + remaining cases)!")

  session.write_transaction(update_case_ids, case_id, data)


Case IDs successfully added to all entities (20 cases + remaining cases)!


Push new property "source_image" to all related nodes

In [21]:
import os
import json
from neo4j import GraphDatabase
from dotenv import load_dotenv

# Load data
folder_path = "/Users/wbm/Documents/BIT/Thesis/graphRAG-industrial-fault-diagnosis/data/processed"
file_manual_book = os.path.join(folder_path, "manual_book_fault_reports.json")
with open(file_manual_book, "r") as f:
    data_manual_book = json.load(f)

# Neo4j setup
load_dotenv()
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASS")
driver = GraphDatabase.driver(uri, auth=(username, password))

def update_source_image(tx, source_image, data):
    # FaultLocation
    tx.run("""
        MATCH (n:FaultLocation {name: $loc_name})
        SET n.source_image = $source_image
        """,
        loc_name=data["fault_location"]["name"],
        source_image=source_image
    )
    # FaultSymptoms
    for symptom in data["fault_symptoms"]:
        tx.run("""
            MATCH (n:FaultSymptom {description: $symptom})
            SET n.source_image = $source_image
            """,
            symptom=symptom,
            source_image=source_image
        )
    # FaultReasons
    for reason in data["fault_reason"]:
        tx.run("""
            MATCH (n:FaultReason {name: $reason_name})
            SET n.source_image = $source_image
            """,
            reason_name=reason["name"],
            source_image=source_image
        )
    # FaultMeasures
    for measure in data["fault_measures"]:
        tx.run("""
            MATCH (n:FaultMeasure {description: $measure_desc})
            SET n.source_image = $source_image
            """,
            measure_desc=measure["description"],
            source_image=source_image
        )

with driver.session() as session:
    for entry in data_manual_book:
        source_image = entry.get("source_image")
        if source_image:
            session.write_transaction(update_source_image, source_image, entry)
        else:
            print("Skipped entry with no source_image:", entry)

driver.close()
print("source_image property successfully added to all entities from manual_book_fault_reports.json!")



  session.write_transaction(update_source_image, source_image, entry)


source_image property successfully added to all entities from manual_book_fault_reports.json!
