In [9]:
import os
from neo4j import GraphDatabase
from dotenv import load_dotenv

In [10]:
# Load .env variables
load_dotenv()

# Read environment variables
uri = os.getenv("NEO4J_URI")
user = os.getenv("NEO4J_USER")
pwd = os.getenv("NEO4J_PASS")

# Create Neo4j driver
driver = GraphDatabase.driver(uri, auth=(user, pwd))

# Test connection
try:
    with driver.session() as session:
        result = session.run("RETURN 1 AS test").single()
        print("Connected to Neo4j! Test query returned:", result["test"])
except Exception as e:
    print("Connection failed:", e)

Connected to Neo4j! Test query returned: 1


Install neo4j package for graphrag

pip install neo4j-graphrag


In [12]:
# Setting up OpenAI Embeddings

import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()

# Force-set the key into the environment in case it's still not picked up
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

from neo4j_graphrag.embeddings import OpenAIEmbeddings
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")

STEP 1 : Tagging Nodes for Indexing

Add new label for all entities "TextChunk"

this is the cypher to do that in Neo4J

MATCH (n:FaultSymptom) SET n:TextChunk;
MATCH (n:FaultReason) SET n:TextChunk;
MATCH (n:FaultMeasure) SET n:TextChunk;
MATCH (n:FaultLocation) SET n:TextChunk;

it is not changing the existing label, but it add new ones "TextChunk" for each entity

In [7]:
# Step 2: Creating a Vector Index in Neo4j

from neo4j_graphrag.indexes import create_vector_index

driver = GraphDatabase.driver(uri, auth=(user, pwd))

INDEX_NAME = "content_index"
label_to_index = "TextChunk"
vector_prop = "embedding"
dim = 1536

# Create vector index (once only)
create_vector_index(
    driver,
    INDEX_NAME,
    label=label_to_index,
    embedding_property=vector_prop,
    dimensions=dim,
    similarity_fn="cosine"  # cosine is best for OpenAI embeddings
)

print(f"Vector index '{INDEX_NAME}' created successfully.")

NameError: name 'GraphDatabase' is not defined

In [5]:
# Step 3: Computing and Upserting Embeddings

from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.indexes import upsert_vectors
from neo4j_graphrag.types import EntityType

# Initialize embedder (you previously set OPENAI_API_KEY)
embedder = OpenAIEmbeddings(model="text-embedding-ada-002")

# Query adjusted for your property names
query = """
MATCH (n:TextChunk)
WHERE (n:FaultSymptom AND n.description IS NOT NULL)
   OR (n:FaultMeasure AND n.description IS NOT NULL)
   OR (n:FaultReason AND n.name IS NOT NULL)
   OR (n:FaultLocation AND n.name IS NOT NULL)
RETURN elementId(n) AS node_id,
       CASE
         WHEN n:FaultSymptom THEN n.description
         WHEN n:FaultMeasure THEN n.description
         WHEN n:FaultReason THEN n.name
         WHEN n:FaultLocation THEN n.name
       END AS content
"""

with driver.session() as session:
    result = session.run(query)
    records = result.data()

print(f"Found {len(records)} nodes to embed.")





Found 740 nodes to embed.


Example format of the output for records:

{
  "node_id": "some_id",
  "content": "The vacuum pump overheats when running continuously"
}

In [6]:
# Compute Embeddings

node_ids = []
embeddings = []

for record in records:
    text = record["content"]
    node_id = record["node_id"]

    #Safety check in case of empty texts
    if not text or not text.strip():
        continue

    vec = embedder.embed_query(text)

    node_ids.append(str(node_id))
    embeddings.append(vec)

print(f"Generated embeddings for {len(node_ids)} nodes.")


Generated embeddings for 740 nodes.


In [9]:
# Upsert embeddings to Neo4J

upsert_vectors(
    driver,
    ids=node_ids,
    embedding_property= "embedding",
    embeddings=embeddings,
    entity_type=EntityType.NODE
)

print(f" Indexed {len(node_ids)} text nodes with embeddings.")

 Indexed 740 text nodes with embeddings.


Implementing Retrieval Strategies (Vector Search and Text-to-Cypher)

In [4]:
from neo4j_graphrag.llm import OpenAILLM

# Instantiate the LLM for generating answers
llm = OpenAILLM(
    model_name="gpt-3.5-turbo",
    model_params={"temperature":0, "max_tokens": 1024}
)

Internally, Neo4J uses Langchain

In [7]:
# VectorRetriever - Semantic Vector Search

from neo4j_graphrag.retrievers import VectorRetriever

vector_retriever = VectorRetriever(driver, INDEX_NAME, embedder)

By default, this retriever will just retrieve the node and its similarity score
neo4j.com
. Often, you might want not just the raw node but some surrounding context. For example, if a log entry node is found, maybe you want to pull its related equipment or location. The GraphRAG library offers a VectorCypherRetriever which allows a follow-up Cypher query using the found node as a starting point
neo4j

Later, this might be useful for finding the FaultLocation is belong to which type_of components in Machine Ontology.

In [5]:
# Text2CypherRetriever - NL to Cypher Querying

from neo4j_graphrag.retrievers import Text2CypherRetriever

neo4j_schema = """
Graph schema:
- Nodes: [FaultSymptom, FaultReason, FaultMeasure, FaultLocation]
- Relationships:
  (FaultSymptom)-[:CAUSED_BY]->(FaultReason)
  (FaultSymptom)-[:MITIGATED_BY {resolution_status}]->(FaultMeasure)
  (FaultLocation)-[:HAS_FAULT]->(FaultSymptom)
- Properties:
  FaultSymptom{description}
  FaultReason{name}
  FaultMeasure{description}
  FaultLocation{name, machines}
"""

examples = [
        "Q: What fault symptoms are associated with cryopump?\nCypher: MATCH (fl:FaultLocation)-[:HAS_FAULT]->(fs:FaultSymptom) WHERE toLower(fl.name) = 'compressor' RETURN fs.description",
        "Q: De neutralizer heeft geen emissie. Wat zijn de mogelijke oorzaken?\n MATCH (fl:FaultLocation)-[hf:HAS_FAULT]->(fs:FaultSymptom)-[cb:CAUSED_BY]->(fr:FaultReason) WHERE fl.name = 'Neutralizer' AND toLower(fs.description) CONTAINS 'geen emissie' RETURN fr",
        "Q: Welke maatregel moet worden genomen als er lucht terugkomt in het koelcircuit en er lekkage is bij de glycolpomp?\n MATCH (fl:FaultLocation)-[hf:HAS_FAULT]->(fs:FaultSymptom)-[mb:MITIGATED_BY]->(fm:FaultMeasure) WHERE fl.name = 'Glycolpomp' AND (fs.description CONTAINS 'lekkage' OR fs.description CONTAINS 'lucht') RETURN fm"
    ]    

t2c_llm = OpenAILLM(model_name="gpt-3.5-turbo", model_params={"temperature": 0})
text2cypher_retriever = Text2CypherRetriever(
    driver=driver,
    llm=t2c_llm,
    neo4j_schema=neo4j_schema,
    examples= examples
)

CHATBOT PIPELINE

In [6]:
from langdetect import detect

def answer_question(query: str) -> str:
    # Automatically detect the language of the query
    try:
        language = detect(query)
        print(f"[DEBUG] Detected language: {language}")
    except Exception as e:
        print(f"[DEBUG] Language detection failed: {e}")
        language = "en"  # fallback if detection fails

    # Setup storage
    context_chunks = []
    answer_parts = []

    # Run text2cypher Retriever
    try:
        records = list(text2cypher_retriever.search(query))
    except Exception as e:
        records = []

    # Format Text2Cypher result
    if records:
        context_text = "Graph query results:\n"
        for rec in records:
            context_text += str(rec) + "\n"
        context_chunks.append(context_text)
        answer_parts.append(f"*Data retrieved from graph for the query.*")

    # Run vector retriever (semantic search)
    try:
        results = list(vector_retriever.search(query_text=query, top_k=10))
    except Exception as e:
         results = []

    # Format Vector Retriever Results
    if results:
        # Check if it's the ('items', [...]) pattern
        if results[0][0] == "items":
            retriever_items = results[0][1]  # this is the list of RetrieverResultItem
            for item in retriever_items:
                try:
                    # Parse the inner content
                    content = eval(item.content) if isinstance(item.content, str) else item.content
                    snippet = (
                        content.get("description") or
                        content.get("text") or
                        content.get("name") or
                        content.get("content") or
                        str(content)
                    )
                    score = item.metadata.get("score", 0.0)
                    context_chunks.append(f"Excerpt (score {score:.2f}): {snippet}")
                except Exception as parse_e:
                    context_chunks.append(f"(Parsing error: {parse_e}) Raw: {item}")
            answer_parts.append(f"*Found {len(retriever_items)} related documents in the knowledge base.*")
        else:
            # Fallback for unknown structure
            context_chunks.append("(Unknown result structure)")

    # Combine the context for LLM
    if context_chunks:
        context_str = "\n\n".join(context_chunks)
    else:
        context_str = "(No direct context found; rely on general knowledge.)"
    
    # LLM System Instruction (language-aware)
    system_msg = (
        "You are an assistant answering questions about maintenance and troubleshooting of machine."
        "Use the provided graph data to answer"
    )

    if language == "nl":
        system_msg += "Vertaal alle technische beschrijvingen naar het Nederlands. Antwoord volledig in het Nederlands."
    else:
        system_msg += "Translate all retrieved technical descriptions into English. Respond entirely in English."

    # Final prompt for the LLM
    prompt = f"{system_msg}\n\nContext:\n{context_str}\n\nQuestion: {query}\nAnswer:"

    print("===== FINAL PROMPT TO LLM =====")
    print(prompt)
    print("================================")

    #Generate the final answer using the LLM
    final_answer = llm.invoke(prompt)
    return final_answer


In [67]:
question = "Wat zijn de mogelijke foutsymptomen bij PLC's?"
answer = answer_question(question)
print(answer)

[DEBUG] Detected language: nl
===== FINAL PROMPT TO LLM =====
You are an assistant answering questions about maintenance and troubleshooting of machine.Use the provided graph data to answerVertaal alle technische beschrijvingen naar het Nederlands. Antwoord volledig in het Nederlands.

Context:
Graph query results:
('items', [])
('metadata', {'cypher': "MATCH (fl:FaultLocation)-[:HAS_FAULT]->(fs:FaultSymptom) WHERE toLower(fl.name) = 'PLC' RETURN fs.description", '__retriever': 'Text2CypherRetriever'})


Excerpt (score 0.93): problemen van de motor control lijken van de PLC/instellingen ARC-net kaart te komen

Excerpt (score 0.93): tekening komt niet overeen met de PLC

Excerpt (score 0.93): PLC / computer(?) Op tilt: tegenstrijdige foutmeldingen

Excerpt (score 0.92): hardware in orde, problemen in de PLC/Arc-net kaart

Excerpt (score 0.92): PLC kaart

Question: Wat zijn de mogelijke foutsymptomen bij PLC's?
Answer:
content="Mogelijke foutsymptomen bij PLC's kunnen zijn problemen met 

In [69]:
question = "Wat zijn de mogelijke foutsymptomen bij een bron?"
answer = answer_question(question)
print(answer)

[DEBUG] Detected language: nl
===== FINAL PROMPT TO LLM =====
You are an assistant answering questions about maintenance and troubleshooting of machine.Use the provided graph data to answerVertaal alle technische beschrijvingen naar het Nederlands. Antwoord volledig in het Nederlands.

Context:
Graph query results:
('items', [RetrieverResultItem(content="<Record fs.description='Bron startte niet door'>", metadata=None), RetrieverResultItem(content="<Record fs.description='emissie blijft vaak beperkt tot 24mA'>", metadata=None), RetrieverResultItem(content="<Record fs.description='oude probleem, geen bundel uit de bron'>", metadata=None), RetrieverResultItem(content="<Record fs.description='Uitval tijdens proces'>", metadata=None), RetrieverResultItem(content="<Record fs.description='communicatie fout'>", metadata=None), RetrieverResultItem(content='<Record fs.description=\'start in stand "plasma" niet door\'>', metadata=None)])
('metadata', {'cypher': "MATCH (fl:FaultLocation)-[:HAS_FA

In [70]:
question = "Ik kan de plasma bron niet starten. Hoe komt dat en wat moet ik doen?"
answer = answer_question(question)
print(answer)

[DEBUG] Detected language: nl
===== FINAL PROMPT TO LLM =====
You are an assistant answering questions about maintenance and troubleshooting of machine.Use the provided graph data to answerVertaal alle technische beschrijvingen naar het Nederlands. Antwoord volledig in het Nederlands.

Context:
Graph query results:
('items', [])
('metadata', {'cypher': "MATCH (fl:FaultLocation)-[:HAS_FAULT]->(fs:FaultSymptom)-[:CAUSED_BY]->(fr:FaultReason) WHERE toLower(fl.name) = 'plasma bron' AND toLower(fs.description) CONTAINS 'niet starten' RETURN fr", '__retriever': 'Text2CypherRetriever'})


Excerpt (score 0.95): Plasma bron starte niet op

Excerpt (score 0.92): start in stand "plasma" niet door

Excerpt (score 0.92): machine start niet goed op

Excerpt (score 0.92): blijkt hangen op status 'plasma'

Excerpt (score 0.92): Opstartproblemen

Excerpt (score 0.92): Neutralizer start niet goed op

Excerpt (score 0.92): Machine geeft problemen met het opstarten

Excerpt (score 0.92): Bij 135 mA viel b

In [71]:
question = "Ik heb een probleem met de RF-generator. De plasmabron start niet. Hoe komt dat en wat is de oplossing?"
answer = answer_question(question)
print(answer)

[DEBUG] Detected language: nl
===== FINAL PROMPT TO LLM =====
You are an assistant answering questions about maintenance and troubleshooting of machine.Use the provided graph data to answerVertaal alle technische beschrijvingen naar het Nederlands. Antwoord volledig in het Nederlands.

Context:
Graph query results:
('items', [])
('metadata', {'cypher': "MATCH (fl:FaultLocation)-[:HAS_FAULT]->(fs:FaultSymptom)-[:CAUSED_BY]->(fr:FaultReason) WHERE toLower(fl.name) = 'RF-generator' AND toLower(fs.description) CONTAINS 'plasmabron start niet' RETURN fr", '__retriever': 'Text2CypherRetriever'})


Excerpt (score 0.93): Plasma bron starte niet op

Excerpt (score 0.92): Machine geeft problemen met het opstarten

Excerpt (score 0.92): Neutralizer start niet goed op

Excerpt (score 0.92): machine start niet goed op

Excerpt (score 0.92): Opstartproblemen

Question: Ik heb een probleem met de RF-generator. De plasmabron start niet. Hoe komt dat en wat is de oplossing?
Answer:
content='Het problee

In [77]:
question = "Ik heb een probleem met de compressor. het kan niet starten. Hoe komt dat en wat is de oplossing?"
answer = answer_question(question)
print(answer)

[DEBUG] Detected language: nl
===== FINAL PROMPT TO LLM =====
You are an assistant answering questions about maintenance and troubleshooting of machine.Use the provided graph data to answerVertaal alle technische beschrijvingen naar het Nederlands. Antwoord volledig in het Nederlands.

Context:
Graph query results:
('items', [])
('metadata', {'cypher': "MATCH (fl:FaultLocation)-[:HAS_FAULT]->(fs:FaultSymptom)-[:CAUSED_BY]->(fr:FaultReason), (fl)-[:HAS_FAULT]->(fs)-[:MITIGATED_BY]->(fm:FaultMeasure) \nWHERE toLower(fl.name) = 'compressor' AND fs.description CONTAINS 'kan niet starten' \nRETURN fr.name, fm.description", '__retriever': 'Text2CypherRetriever'})


Excerpt (score 0.93): Machine geeft problemen met het opstarten

Excerpt (score 0.92): machine start niet goed op

Excerpt (score 0.92): probleem met de koeling

Excerpt (score 0.92): compressor had moeite met hoog vacuum

Excerpt (score 0.92): Koelslangen omgewisseld en de compressor werkt weer

Excerpt (score 0.92): Neutralizer 

In [71]:
test_query = "Ik heb een probleem: de emissie van de Bron is beperkt tot 24mA. Wat zijn de mogelijke oorzaken van deze daling?"

items = text2cypher_retriever.search(test_query)

print(text2cypher_retriever.search(test_query))

print(items.metadata["cypher"])

items=[RetrieverResultItem(content="<Record fr=<Node element_id='4:27c1505a-061e-4c53-a17f-9cc7dc4d5c84:103' labels=frozenset({'TextChunk', 'FaultReason'}) properties={'name': 'Anode bleek een dikke harde isolerende laag te hebben', 'embedding': [0.008099883794784546, -0.010476883500814438, 0.0064237117767333984, -0.010793386027216911, -0.027826396748423576, 0.00803529191762209, 0.007286020088940859, -0.00323123368434608, -0.019028915092349052, -0.026508711278438568, 0.013809850439429283, 0.02808476611971855, 0.01924852840602398, 0.00775754451751709, -0.03841954469680786, -0.0008598860003985465, 0.04407783970236778, 0.02114754356443882, 0.0076219006441533566, 0.010722334496676922, -0.023731239140033722, 0.01213044859468937, -0.012886179611086845, -0.0076606557704508305, -0.003478299593552947, -0.017801659181714058, 0.007751085329800844, -0.026508711278438568, -0.012821586802601814, 0.0005215834244154394, 0.01330602914094925, -0.01834423467516899, -0.009960144758224487, -0.0107740079984

THIS IS THE REAL RETRIEVER (HYBRID + TEXT TO CYPHER (GRAPH TRAVERSAL))

I added fulltext-index on Neo4J Aura:

CREATE FULLTEXT INDEX `fulltext-index`
FOR (n:FaultLocation|FaultSymptom|FaultReason|FaultMeasure)
ON EACH [n.name, n.description]

In [13]:
# Setting up HybridCypherRetriever

from neo4j_graphrag.retrievers import HybridCypherRetriever

cypher_traversal_query = """
WITH node, score
OPTIONAL MATCH (node)<-[:CAUSED_BY]-(sym_from_reason:FaultSymptom)
OPTIONAL MATCH (node)<-[:MITIGATED_BY]-(sym_from_measure:FaultSymptom)
OPTIONAL MATCH (node)-[:HAS_FAULT]->(sym_from_loc:FaultSymptom)
WITH node, score,
    CASE 
       WHEN sym_from_reason IS NOT NULL THEN sym_from_reason
       WHEN sym_from_measure IS NOT NULL THEN sym_from_measure
       WHEN sym_from_loc IS NOT NULL THEN sym_from_loc
       WHEN node:FaultSymptom THEN node
       ELSE NULL 
    END AS symptom,
    node:FaultLocation AS isLocation
OPTIONAL MATCH (location:FaultLocation)-[:HAS_FAULT]->(symptom)
OPTIONAL MATCH (symptom)-[:CAUSED_BY]->(reason:FaultReason)
OPTIONAL MATCH (symptom)-[:MITIGATED_BY]->(measure:FaultMeasure)
WITH coalesce(location, CASE WHEN isLocation THEN node END) AS location,
     symptom, reason, measure, node, score
RETURN 
  coalesce(location.name, '') + ": " + coalesce(symptom.description, '') 
    + CASE WHEN reason IS NOT NULL THEN " Cause: " + reason.name ELSE "" END 
    + CASE WHEN measure IS NOT NULL THEN " Measure: " + measure.description ELSE "" END 
  AS text,
  score,
  {
    location: location.name, 
    symptom: symptom.description, 
    reason: reason.name, 
    measure: measure.description,
    location_id: elementId(location),
    symptom_id: elementId(symptom),
    reason_id: elementId(reason),
    measure_id: elementId(measure)
  } AS metadata
"""

retriever = HybridCypherRetriever(
    driver,
    vector_index_name="content_index",
    fulltext_index_name="fulltext-index",
    retrieval_query=cypher_traversal_query,
    embedder=embedder
)


In [2]:
# Language Detection

from langdetect import detect

def detect_language(text: str) -> str:
    try:
        lang_code = detect(text)
    except Exception:
        lang_code = "en"
    return "nl" if lang_code.startswith("nl") else "en"

the OpenAI embedding model can handle Dutch queries to some extent in the vector space. No need to translate the query since ADA is multilingual embeddings

In [3]:
from dotenv import load_dotenv
import os
import openai

# Load environment variables from .env file
load_dotenv()

# Read API key from environment
openai_api_key = os.getenv("OPENAI_API_KEY")

# Assign to OpenAI library
openai.api_key = openai_api_key

# Choose the model
OPENAI_MODEL = "gpt-3.5-turbo"

In [4]:
from openai import OpenAI

# Instantiate OpenAI client (will read OPENAI_API_KEY from env automatically)
client = OpenAI()

def generate_answer(user_query: str, graph_context: str, user_lang: str) -> str:

    #prompt for retriever behaviour and language (tuple)
    system_msg = (
        "You are an industrial maintenance assistant. "
        "If the user speaks Dutch, answer in Dutch; if English, answer in English. "
        "Use the provided graph context for factual information, and translate it if needed. "
        "Clearly mark any information taken from the graph context as [Graph], and any additional info from your general knowledge as [LLM]."
    )

    #user message with question and context
    user_msg = (
        f"**Graph context**:\n{graph_context}\n\n"
        f"**Question**:\n{user_query}\n\n"
        "**Answer** (please respond in the user's language and indicate [Graph] or [LLM] sources):"
    )

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": user_msg}
    ]

    # New SDK call using OpenAI client
    response = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=messages
    )

    answer = response.choices[0].message.content
    return answer

In [9]:
#Testing the logic

question = "Ik heb een probleem met de neutralizer. Hij produceert geen emissie. Hoe komt dat en wat is de oplossing?"

lang = detect_language(question)
print(f"[DEBUG] Detected language: {lang}")

retriever_result = retriever.search(query_text=question, top_k=5)

# Extract graph context into a readable string
context_chunks = []
for item in retriever_result.items:
    meta = item.metadata
    if not meta:
        continue

    snippet = ""
    if "symptom" in meta:
        snippet += f"Symptom: {meta['symptom']}\n"
    if "reason" in meta and meta['reason']:
        snippet += f"Reason: {meta['reason']}\n"
    if "measure" in meta and meta['measure']:
        snippet += f"Measure: {meta['measure']}\n"
    if "location" in meta and meta['location']:
        snippet = f"Location: {meta['location']}\n" + snippet

    context_chunks.append(snippet)

# Combine all context snippets
graph_context = "\n\n".join(context_chunks)
if not graph_context:
    graph_context = "(No graph context found — LLM will use general knowledge)"

print("\n==== Retrieved Graph Context ====")
print(graph_context)

# === Step 5: Generate the LLM answer ===
answer = generate_answer(user_query=question, graph_context=graph_context, user_lang=lang)

print("\n==== LLM Answer ====")
print(answer)

[DEBUG] Detected language: nl

==== Retrieved Graph Context ====
Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: Neutralizer schoongemaakt / gerepareerd


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: keramische binnenplaat vervangen


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: binnenkant en diafragma gestraald


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: Bron schoongemaakt


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: keramische buizen gestraald


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: schone kwartscilinder erin


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emissie
Measure: Dekplaat buitenkant bron gestraald/schoongemaakt


Location: Neutralizer
Symptom: Neutralizer starte wel gelijk maar geen emis

In [175]:
#Testing the logic

question = "Ik heb een probleem in de compressor gevonden, namelijk een lekkage van helium in de warmtewisselaar. Wat is de oorzaak en wat is de oplossing?"

lang = detect_language(question)
print(f"[DEBUG] Detected language: {lang}")

retriever_result = retriever.search(query_text=question, top_k=5)

# Extract graph context into a readable string
context_chunks = []
for item in retriever_result.items:
    meta = item.metadata
    if not meta:
        continue

    snippet = ""
    if "symptom" in meta:
        snippet += f"Symptom: {meta['symptom']}\n"
    if "reason" in meta and meta['reason']:
        snippet += f"Reason: {meta['reason']}\n"
    if "measure" in meta and meta['measure']:
        snippet += f"Measure: {meta['measure']}\n"
    if "location" in meta and meta['location']:
        snippet = f"Location: {meta['location']}\n" + snippet

    context_chunks.append(snippet)

# Combine all context snippets
graph_context = "\n\n".join(context_chunks)
if not graph_context:
    graph_context = "(No graph context found — LLM will use general knowledge)"

print("\n==== Retrieved Graph Context ====")
print(graph_context)

# === Step 5: Generate the LLM answer ===
answer = generate_answer(user_query=question, graph_context=graph_context, user_lang=lang)

print("\n==== LLM Answer ====")
print(answer)

[DEBUG] Detected language: nl

==== Retrieved Graph Context ====
Location: arm cover
Symptom: Lekkage glycol in de cover van de arm
Measure: Bestellen van keerringen en O-ringen via Oxford (ger-pt-office@oxinst.com)


Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason: Lekkage bij adsorber aansluiting
Measure: Nieuwe adsorber, Cryo en slangen gespoeld met He en nieuwe adsorber (actieve kool met He-vulling)


Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason: He-lekkage aan de heatexchanger
Measure: Nieuwe adsorber, Cryo en slangen gespoeld met He en nieuwe adsorber (actieve kool met He-vulling)


Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason: Lekkage bij adsorber aansluiting
Measure: Compressor afgevuld op 265 psi


Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason: He-lekkage aan de heatexchanger
Measure: Compressor afgevuld op 265 psi


Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason: 

In [17]:
#Testing the logic

from collections import defaultdict

question = "Ik heb een probleem in de compressor gevonden, namelijk een lekkage van helium in de warmtewisselaar. Wat is de oorzaak en wat is de oplossing?"

lang = detect_language(question)
print(f"[DEBUG] Detected language: {lang}")

retriever_result = retriever.search(query_text=question, top_k=5)

#Agregate all results
grouped = defaultdict(lambda: {"location": set(), "symptom": set(), "reason": set(), "measure": set()})

for item in retriever_result.items:
    meta = item.metadata or {}

    location = (meta.get("location") or "").strip()
    symptom = (meta.get("symptom") or "").strip()
    reason = (meta.get("reason") or "").strip()
    measure = (meta.get("measure") or "").strip()

    # Build a group key that combines fields that are the same
    key = (location, symptom, measure)  # fields you want to be the same across grouped items
    grouped[key]["location"].add(location)
    grouped[key]["symptom"].add(symptom)
    grouped[key]["reason"].add(reason)
    grouped[key]["measure"].add(measure)

# === Step 2: Build the context string ===
context_str = ""

def format_field(name, values):
    values = sorted(v for v in values if v)
    if not values:
        return ""
    if len(values) == 1:
        return f"{name}: {values[0]}\n"
    return f"{name}:\n" + "".join(f"- {v}\n" for v in values)

for entry in grouped.values():
    snippet = ""
    snippet += format_field("Location", entry["location"])
    snippet += format_field("Symptom", entry["symptom"])
    snippet += format_field("Reason", entry["reason"])
    snippet += format_field("Measure", entry["measure"])
    context_str += snippet + "\n"

# Use context_str in your prompt
graph_context = context_str.strip() if context_str.strip() else "(No graph context found — LLM will use general knowledge)"

print("\n==== Retrieved Graph Context ====")
print(graph_context)

# === Step 5: Generate the LLM answer ===
answer = generate_answer(user_query=question, graph_context=graph_context, user_lang=lang)

print("\n==== LLM Answer ====")
print(answer)

[DEBUG] Detected language: nl

==== Retrieved Graph Context ====
Location: arm cover
Symptom: Lekkage glycol in de cover van de arm
Measure: Bestellen van keerringen en O-ringen via Oxford (ger-pt-office@oxinst.com)

Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason:
- He-lekkage aan de heatexchanger
- Lekkage bij adsorber aansluiting
Measure: Nieuwe adsorber, Cryo en slangen gespoeld met He en nieuwe adsorber (actieve kool met He-vulling)

Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason:
- He-lekkage aan de heatexchanger
- Lekkage bij adsorber aansluiting
Measure: Compressor afgevuld op 265 psi

Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason:
- He-lekkage aan de heatexchanger
- Lekkage bij adsorber aansluiting
Measure: Compressor naar VS: warmte wisselaar vervangen

Location: Compressor
Symptom: He-lekkage aan de heatexchanger
Reason:
- He-lekkage aan de heatexchanger
- Lekkage bij adsorber aansluiting
Measure: Alle kleppen

In [14]:
#Testing the logic

question = "The Neutralizer in the IBM machine is not working. What would be the possible causes of it?"
lang = detect_language(question)
print(f"[DEBUG] Detected language: {lang}")

retriever_result = retriever.search(query_text=question, top_k=3)

# Extract graph context into a readable string
context_chunks = []
for item in retriever_result.items:
    meta = item.metadata
    if not meta:
        continue

    snippet = ""
    if "symptom" in meta:
        snippet += f"Symptom: {meta['symptom']}\n"
    if "reason" in meta and meta['reason']:
        snippet += f"Reason: {meta['reason']}\n"
    if "measure" in meta and meta['measure']:
        snippet += f"Measure: {meta['measure']}\n"
    if "location" in meta and meta['location']:
        snippet = f"Location: {meta['location']}\n" + snippet

    context_chunks.append(snippet)

# Combine all context snippets
graph_context = "\n\n".join(context_chunks)
if not graph_context:
    graph_context = "(No graph context found — LLM will use general knowledge)"
print(f"\n[DEBUG] Number of retrieved nodes: {len(retriever_result.items)}")
print(f"[DEBUG] Length of graph_context: {len(graph_context)} characters")
print(f"[DEBUG] Length of question: {len(question)} characters")

print("\n==== Retrieved Graph Context ====")
print(graph_context)

print("\n==== Sending to LLM ====")
print("USER QUERY:")
print(question)
print("\nGRAPH CONTEXT:")
print(graph_context)

# === Step 5: Generate the LLM answer ===
answer = generate_answer(user_query=question, graph_context=graph_context, user_lang=lang)

print("\n==== LLM Answer ====")
print(answer)

[DEBUG] Detected language: en

[DEBUG] Number of retrieved nodes: 226
[DEBUG] Length of graph_context: 41093 characters
[DEBUG] Length of question: 91 characters

==== Retrieved Graph Context ====
Location: Neutralizer
Symptom: Contactor closes but opens shortly thereafter
Reason: Cathode or neutralizer filaments issue
Measure: Check the cathode or neutralizer filaments


Location: Neutralizer
Symptom: Contactor closes but opens shortly thereafter
Reason: Fault switch settings
Measure: Check the cathode or neutralizer filaments


Location: Neutralizer
Symptom: Contactor closes but opens shortly thereafter
Reason: Neon not on
Measure: Check the cathode or neutralizer filaments


Location: Neutralizer
Symptom: Contactor closes but opens shortly thereafter
Reason: Blown fuse
Measure: Check the cathode or neutralizer filaments


Location: Neutralizer
Symptom: Contactor closes but opens shortly thereafter
Reason: Improper connector contact
Measure: Check the cathode or neutralizer filaments