## Preparation

In [1]:
import json
import ast
import os
from os import getenv
from stix2validator import validate_file, print_results
from dotenv import load_dotenv
from neo4j import GraphDatabase
import csv
import time

from neo4j_graphrag.embeddings import OllamaEmbeddings
from neo4j_graphrag.indexes import create_vector_index
from neo4j_graphrag.indexes import upsert_vectors
from neo4j_graphrag.types import EntityType
from neo4j_graphrag.retrievers import VectorRetriever
from neo4j_graphrag.llm import OllamaLLM


load_dotenv("../.env")
db_uri = getenv("db_uri")
db_name = getenv("db_name")
db_username = getenv("db_username")
db_password= getenv("db_password")

auth = (db_username, db_password)
driver = GraphDatabase.driver(uri=db_uri, auth=auth)
embedder = OllamaEmbeddings(model="nomic-embed-text")
llm = OllamaLLM(model_name="deepseek-r1:1.5b")

In [3]:
#main function to load SDOs
def load_sdos(path):
    with open(path) as f:
        stix_json_data = json.load(f)

    stix_objects = [obj for obj in stix_json_data["objects"] if obj["type"] not in ("relationship", "x-mitre-collection")]

    for stix_object in stix_objects:

        label = to_pascal_case(stix_object["type"])
        object_properties = get_stix_properties_dict(stix_object)

        query = f"""
            MERGE (x:SDO:{label} {{id: "{stix_object["id"]}"}})
            SET x = $properties
        """

        session.run(query, properties=object_properties)


#main function to load SROs
def load_sros(path):
    with open(path) as f:
        stix_json_data = json.load(f)

    stix_relationships = [rel for rel in stix_json_data["objects"] if rel["type"] in "relationship"]

    for stix_relationship in stix_relationships:

        relationship_name = to_pascal_case(stix_relationship["relationship_type"])
        relationship_properties = get_stix_properties_dict(stix_relationship)

        query = f"""
            MATCH (sourceObject {{id: "{stix_relationship["source_ref"]}"}}), (targetObject {{id: "{stix_relationship["target_ref"]}"}})
            MERGE (sourceObject)-[r:{relationship_name}]->(targetObject)
            SET r = $properties
        """
        session.run(query, properties=relationship_properties)


#main function to load embedded relationships
def load_embedded_relationships(path):
    with open(path) as f:
        stix_json_data = json.load(f)

    ###Matrices to Tactics###

    matrix_objects = [obj for obj in stix_json_data["objects"] if obj["type"] == "x-mitre-matrix"]

    for matrix_obj in matrix_objects:

        for tactic_ref_id in matrix_obj["tactic_refs"]:

            relationship_type = "ReferencesTactic"

            relationship_properties = {
                "relationship_type": relationship_type,
                "source_ref": matrix_obj["id"],
                "target_ref": tactic_ref_id
            }

            query = f"""
                MATCH (sourceObject {{id: "{matrix_obj["id"]}"}}), (targetObject {{id: "{tactic_ref_id}"}})
                MERGE (sourceObject)-[r:{relationship_type}]->(targetObject)
                SET r = $properties
            """
            session.run(query, properties=relationship_properties)

    ###Tactics to Techniques###

    tactic_shortname_to_id = {}
    for obj in stix_json_data["objects"]:
        if obj["type"] == "x-mitre-tactic" and "x_mitre_shortname" in obj:
            tactic_shortname_to_id[obj["x_mitre_shortname"]] = obj["id"]

    attack_patterns = [obj for obj in stix_json_data["objects"] if obj["type"] == "attack-pattern"]

    for attack_pattern in attack_patterns:
        attack_pattern_id = attack_pattern["id"]

        if attack_pattern.get("kill_chain_phases"):
            for phase in attack_pattern["kill_chain_phases"]:
                phase_name = phase["phase_name"]

                if phase_name in tactic_shortname_to_id:
                    tactic_id = tactic_shortname_to_id[phase_name]

                    relationship_type = "ContainsTechnique"

                    relationship_properties = {
                        "relationship_type": relationship_type,
                        "source_ref": tactic_id,
                        "target_ref": attack_pattern_id,
                        "kill_chain_name": phase.get("kill_chain_name")
                    }

                    query = f"""
                            MATCH (sourceObject {{id: "{tactic_id}"}}), (targetObject {{id: "{attack_pattern_id}"}})
                            MERGE (sourceObject)-[r:{relationship_type}]->(targetObject)
                            SET r = $properties
                        """
                    session.run(query, properties=relationship_properties)


def to_pascal_case(input_string):
  words = input_string.split('-')
  pascal_case_string = "".join(word.capitalize() for word in words)

  return pascal_case_string


def get_stix_properties_dict(stix_dict):

    properties = {}
    for attr, value in stix_dict.items():
        if isinstance(value, (dict, list)):
            properties[attr] = json.dumps(value)
        else:
            properties[attr] = value

    return properties


def load_stix_to_neo4j(path: str):
    #results = validate_file(path)
    #print_results(results)
    load_sdos(path)
    load_sros(path)
    load_embedded_relationships(path)


with (driver.session(database=db_name) as session):
    load_stix_to_neo4j("../attack-stix-data/ics-attack-17.1.json")
    load_stix_to_neo4j("../attack-stix-data/mobile-attack-17.1.json")
    load_stix_to_neo4j("../attack-stix-data/enterprise-attack-17.1.json")

In [4]:
create_vector_index(
    driver,
    name="nodes",
    label="SDO",
    embedding_property="embedding",
    dimensions=768,
    similarity_fn="cosine",
    neo4j_database=db_name
)

In [7]:
with (driver.session(database=db_name) as session):
    
    result = session.run("""
    MATCH (n:SDO)
    WHERE n.name IS NOT NULL AND n.description IS NOT NULL
    OPTIONAL MATCH (n)-[r]->(m)
    RETURN n, collect({type: type(r), target: m.name}) AS relationships
    """)

    for record in result:
        node = record["n"]
        relationships = record["relationships"]

        base_text = f"{node['name']}. {node['description']}"

        if relationships:
            rel_text = ". ".join(
                [f"Related to {rel['target']} via {rel['type']}" for rel in relationships if rel["target"]]
            )
            full_text = f"{base_text}. {rel_text}"
        else:
            full_text = base_text

        vector = embedder.embed_query(full_text)

        upsert_vectors(
            driver,
            ids=[node.element_id],
            embedding_property="embedding",
            embeddings=[vector],
            entity_type=EntityType.NODE,
            neo4j_database=db_name
        )

KeyboardInterrupt: 

In [2]:
def get_neighborhood(driver, node_id):
    with (driver.session(database=db_name) as session):
        result = session.run("""
            MATCH (n)-[r]-(m)
            WHERE n.id = $id
            RETURN DISTINCT m, type(r) AS rel_type, r.description AS rel_desc
        """, id=node_id)
        return [(record["m"], record["rel_type"], record["rel_desc"]) for record in result]

def build_question_context(main_node, neighbors):
    parts = []

    parts.append("Best similarity search (the main node) is: " + f""""{main_node.get('name')}" of type "{main_node.get('type')}". Description of "{main_node.get('name')}": {main_node.get('description')}""")
    parts.append("\nThe main node's neighbors are the following nodes:")
    
    for neighbor, rel_type, rel_desc in neighbors:
        parts.append(f"""
+++++ {neighbor.get('name').upper()} +++++\n
Node "{neighbor.get('name')}" of type "{neighbor.get('type')}". Description of "{neighbor.get('name')}": {neighbor.get('description')}\n
The main node is related to "{neighbor.get('name')}" via type "{rel_type}". This relationship contains the following description: {rel_desc}""")

    return "\n".join(parts)


def approach2(query_text):
    #query_text = "What are the names of 2 attack patterns used by WannaCry malware?"
    
    retriever = VectorRetriever(driver, "nodes", embedder, neo4j_database=db_name)
    result = retriever.search(query_text=query_text, top_k=1) #similarity search to get the closest match based on the query_text
    
    if result.items == []:
        raise ValueError("Expected items, but got None")

    question_context = ""
    for item in result.items:
        dict_item = ast.literal_eval(item.content)
    
        #getting neighbours of the closest match node and saving their info to question_context
        neighbors_of_main_item = get_neighborhood(driver, dict_item["id"])
        question_context += build_question_context(dict_item, neighbors_of_main_item)
    
    print(f"""DEBUG question_context:\n{question_context}\n{"#" * 50}""")
    print(f"""Question: {query_text}\n""")
    
    # asking llm the question, but now with question_context from the graph
    response = llm.invoke(
        input=query_text,
        system_instruction=question_context
    )
    print(f"""Response:\n{response.content}""")
    return response.content.split("</think>")[1]



## Evaluation

In [9]:
with open(
        "../AttackSeq-Technique-Test.csv", mode="r", newline="", encoding="utf-8"
) as infile, open("../approach2.csv", mode="w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    fieldnames = ["Question ID", "Answer", "Latency"]
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        question_id = row.get("Question ID", "")
        question_text = row.get("Question", "")

        print(f"---------- {question_id} ----------")

        start_time = time.time()
        answer = approach2(question_text)
        latency = round(time.time() - start_time, 4)

        writer.writerow({
            "Question ID": question_id,
            "Answer": answer,
            "Latency": latency
        })
        
driver.close()

---------- 2 ----------
DEBUG question_context:
Best similarity search (the main node) is: "File Deletion" of type "attack-pattern". Description of "File Deletion": Adversaries may delete files left behind by the actions of their intrusion activity. Malware, tools, or other non-native files dropped or created on a system by an adversary (ex: [Ingress Tool Transfer](https://attack.mitre.org/techniques/T1105)) may leave traces to indicate to what was done within a network and how. Removal of these files can occur during an intrusion, or as part of a post-intrusion process to minimize the adversary's footprint.

There are tools available from the host operating system to perform cleanup, but adversaries may use other tools as well.(Citation: Microsoft SDelete July 2016) Examples of built-in [Command and Scripting Interpreter](https://attack.mitre.org/techniques/T1059) functions include <code>del</code> on Windows, <code>rm</code> or <code>unlink</code> on Linux and macOS, and `rm` on ESXi

In [None]:
import csv
import time
import ast # Needed for ast.literal_eval
# Assuming the following are already imported and initialized from your environment:
# from neo4j import GraphDatabase
# from neo4j_graphrag.embeddings import OllamaEmbeddings
# from neo4j_graphrag.retrievers import VectorRetriever
# from neo4j_graphrag.llm import OllamaLLM

# Assuming these are globally available and initialized from your previous setup
# driver = GraphDatabase.driver(...)
# embedder = OllamaEmbeddings(...)
# llm = OllamaLLM(...)
# db_name = "..."

# --- Start of functions from your original approach2.py ---

def get_neighborhood(driver, node_id):
    """
    Retrieves the direct neighbors of a given node from the Neo4j graph.
    """
    with (driver.session(database=db_name) as session):
        result = session.run("""
            MATCH (n)-[r]-(m)
            WHERE n.id = $id
            RETURN DISTINCT m, type(r) AS rel_type, r.description AS rel_desc
        """, id=node_id)
        return [(record["m"], record["rel_type"], record["rel_desc"]) for record in result]

def build_question_context(main_node, neighbors):
    """
    Builds a string context for the LLM based on the main node and its neighbors.
    """
    parts = []
    parts.append("Best similarity search (the main node) is: " + f""""{main_node.get('name')}" of type "{main_node.get('type')}". Description of "{main_node.get('name')}": {main_node.get('description')}""")
    parts.append("\nThe main node's neighbors are the following nodes:")

    for neighbor, rel_type, rel_desc in neighbors:
        parts.append(f"""
+++++ {neighbor.get('name').upper()} +++++
Node "{neighbor.get('name')}" of type "{neighbor.get('type')}". Description of "{neighbor.get('name')}": {neighbor.get('description')}
The main node is related to "{neighbor.get('name')}" via type "{rel_type}". This relationship contains the following description: {rel_desc}""")
    return "\n".join(parts)

# --- End of functions from your original approach2.py ---


# Main approach function (modified for 3 scenarios)
def evaluate_approach(query_text, add_rag_context=False, add_choices=False, choices=None):
    """
    Executes a query against the LLM with different context types.

    Args:
        query_text (str): The main question to ask the LLM.
        add_rag_context (bool): Whether to include RAG-retrieved graph context.
        add_choices (bool): Whether to append answer choices to the query.
        choices (list): A list of answer choice strings, used if add_choices is True.

    Returns:
        tuple: (answer_part, latency, len_thinking, len_answer)
    """
    question_context = ""
    if add_rag_context:
        # Initialize VectorRetriever here, assuming 'driver', 'embedder', 'db_name' are available
        retriever = VectorRetriever(driver, "nodes", embedder, neo4j_database=db_name)
        result = retriever.search(query_text=query_text, top_k=1)

        if result.items:
            for item in result.items:
                # item.content is expected to be a string representation of a dictionary
                dict_item = ast.literal_eval(item.content)
                neighbors_of_main_item = get_neighborhood(driver, dict_item["id"])
                question_context += build_question_context(dict_item, neighbors_of_main_item)
        else:
            print(f"Warning: No relevant nodes found for query: '{query_text}'")

    full_query = query_text
    if add_choices and choices:
        choices_str = ", ".join(choices)
        full_query += f"\n\nChoose from the following options: {choices_str}"

    system_instruction = ""
    if add_rag_context:
        system_instruction = question_context
        # Instruct the LLM to use the provided context
        system_instruction += "\n\nBased on the provided context and the question, please provide the most accurate answer. Start your answer after </think>."
    else:
        system_instruction = "Please answer the following question. Start your answer after </think>."

    start_time = time.time()
    response = llm.invoke(
        input=full_query,
        system_instruction=system_instruction
    )
    latency = round(time.time() - start_time, 4)

    full_response_content = response.content

    # Split response into thinking and answer parts based on the </think> token
    thinking_part = ""
    answer_part = full_response_content
    if "</think>" in full_response_content:
        parts = full_response_content.split("</think>", 1)
        thinking_part = parts[0].strip()
        answer_part = parts[1].strip()

    len_thinking = len(thinking_part)
    len_answer = len(answer_part)

    return answer_part, latency, len_thinking, len_answer

# --- Evaluation Script ---

# Define input and output CSV filenames
input_csv_filename = "../AttackSeq-Technique-Test.csv"
output_csv_filename = "evaluation_results.csv"

# Define the headers for the output CSV file
fieldnames = [
    "Question ID",
    "Question", # Added for easier debugging of results
    "Ground Truth", # Added for easier debugging of results
    "Answer LLM (Question Only)", "Duration (QO)", "Length Thinking (QO)", "Length Answer (QO)", "Correctness (QO)",
    "Answer LLM (RAG)", "Duration (RAG)", "Length Thinking (RAG)", "Length Answer (RAG)", "Correctness (RAG)",
    "Answer LLM (Choices)", "Duration (Choices)", "Length Thinking (Choices)", "Length Answer (Choices)", "Correctness (Choices)"
]

print(f"Starting evaluation from '{input_csv_filename}'...")
with open(input_csv_filename, mode="r", newline="", encoding="utf-8") as infile, \
     open(output_csv_filename, mode="w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        question_id = row.get("Question ID", "")
        question_text = row.get("Question", "")
        # Prepare ground truth for case-insensitive comparison
        ground_truth = row.get("Ground Truth", "").strip().lower()

        unshuffled_choices_str = row.get("Unshuffled Choices", "")
        # Split choices string into a list, stripping whitespace from each choice
        unshuffled_choices = [c.strip() for c in unshuffled_choices_str.split(',')] if unshuffled_choices_str else []

        print(f"\n---------- {question_id} ----------")
        print(f"Question: {question_text}")

        # Dictionary to store results for the current row
        results = {
            "Question ID": question_id,
            "Question": question_text,
            "Ground Truth": row.get("Ground Truth", "") # Store original ground truth
        }

        # --- Scenario 1: Question Only ---
        answer_qo, latency_qo, len_thinking_qo, len_answer_qo = evaluate_approach(question_text, add_rag_context=False, add_choices=False)
        correctness_qo = "Correct" if answer_qo.strip().lower() == ground_truth else "Incorrect"
        results.update({
            "Answer LLM (Question Only)": answer_qo,
            "Duration (QO)": latency_qo,
            "Length Thinking (QO)": len_thinking_qo,
            "Length Answer (QO)": len_answer_qo,
            "Correctness (QO)": correctness_qo
        })
        print(f"  [QO] Answer: '{answer_qo}' | Correct: {correctness_qo}")

        # --- Scenario 2: Question + RAG Graph Knowledge ---
        answer_rag, latency_rag, len_thinking_rag, len_answer_rag = evaluate_approach(question_text, add_rag_context=True, add_choices=False)
        correctness_rag = "Correct" if answer_rag.strip().lower() == ground_truth else "Incorrect"
        results.update({
            "Answer LLM (RAG)": answer_rag,
            "Duration (RAG)": latency_rag,
            "Length Thinking (RAG)": len_thinking_rag,
            "Length Answer (RAG)": len_answer_rag,
            "Correctness (RAG)": correctness_rag
        })
        print(f"  [RAG] Answer: '{answer_rag}' | Correct: {correctness_rag}")

        # --- Scenario 3: Question + RAG Graph Knowledge + Answer Choices ---
        answer_choices, latency_choices, len_thinking_choices, len_answer_choices = evaluate_approach(question_text, add_rag_context=True, add_choices=True, choices=unshuffled_choices)
        correctness_choices = "Correct" if answer_choices.strip().lower() == ground_truth else "Incorrect"
        results.update({
            "Answer LLM (Choices)": answer_choices,
            "Duration (Choices)": latency_choices,
            "Length Thinking (Choices)": len_thinking_choices,
            "Length Answer (Choices)": len_answer_choices,
            "Correctness (Choices)": correctness_choices
        })
        print(f"  [Choices] Answer: '{answer_choices}' | Correct: {correctness_choices}")

        # Write the collected results for the current row to the CSV
        writer.writerow(results)

# Close the Neo4j driver connection after all operations are complete
# IMPORTANT: Ensure 'driver' is accessible and correctly imported/initialized in your full script
driver.close()
print(f"\nEvaluation complete. Results saved to '{output_csv_filename}'")

In [None]:
import csv
import time
import ast # Needed for ast.literal_eval

# Annahme: Die folgenden Imports und Initialisierungen sind in Ihrer Umgebung bereits vorhanden:
# from neo4j import GraphDatabase
# from neo4j_graphrag.embeddings import OllamaEmbeddings
# from neo4j_graphrag.retrievers import VectorRetriever
# from neo4j_graphrag.llm import OllamaLLM

# Annahme: 'driver', 'embedder', 'llm', 'db_name' sind global verfügbar und initialisiert.
# Beispiel (falls noch nicht geschehen, ersetzen Sie die Platzhalter):
# from os import getenv
# from dotenv import load_dotenv
# load_dotenv(".env")
# db_uri = getenv("db_uri")
# db_name = getenv("db_name")
# db_username = getenv("db_username")
# db_password = getenv("db_password")
# auth = (db_username, db_password)
# driver = GraphDatabase.driver(uri=db_uri, auth=auth)
# embedder = OllamaEmbeddings(model="nomic-embed-text") # Oder Ihr spezifisches Modell
# llm = OllamaLLM(model_name="deepseek-r1:1.5b") # Oder Ihr spezifisches Modell


# --- Funktionen aus Ihrem ursprünglichen Ansatz ---

def get_neighborhood(driver, node_id):
    """
    Ruft die direkten Nachbarn eines bestimmten Knotens aus dem Neo4j-Graphen ab.
    """
    with (driver.session(database=db_name) as session):
        result = session.run("""
            MATCH (n)-[r]-(m)
            WHERE n.id = $id
            RETURN DISTINCT m, type(r) AS rel_type, r.description AS rel_desc
        """, id=node_id)
        return [(record["m"], record["rel_type"], record["rel_desc"]) for record in result]

def build_question_context(main_node, neighbors):
    """
    Erstellt einen String-Kontext für das LLM basierend auf dem Hauptknoten und seinen Nachbarn.
    """
    parts = []
    parts.append("Best similarity search (the main node) is: " + f""""{main_node.get('name')}" of type "{main_node.get('type')}". Description of "{main_node.get('name')}": {main_node.get('description')}""")
    parts.append("\nThe main node's neighbors are the following nodes:")

    for neighbor, rel_type, rel_desc in neighbors:
        parts.append(f"""
+++++ {neighbor.get('name').upper()} +++++
Node "{neighbor.get('name')}" of type "{neighbor.get('type')}". Description of "{neighbor.get('name')}": {neighbor.get('description')}
The main node is related to "{neighbor.get('name')}" via type "{rel_type}". This relationship contains the following description: {rel_desc}""")
    return "\n".join(parts)

# --- Ende der Funktionen aus Ihrem ursprünglichen Ansatz ---


# Haupt-Ansatzfunktion (modifiziert für 3 Szenarien und DEBUG-Ausgabe)
def evaluate_approach(query_text, add_rag_context=False, add_choices=False, choices=None):
    """
    Führt eine Abfrage an das LLM mit verschiedenen Kontexttypen aus.

    Args:
        query_text (str): Die Hauptfrage an das LLM.
        add_rag_context (bool): Ob der RAG-abgerufene Graphkontext hinzugefügt werden soll.
        add_choices (bool): Ob Antwortmöglichkeiten an die Abfrage angehängt werden sollen.
        choices (list): Eine Liste von Antwortmöglichkeiten als Strings, wird verwendet, wenn add_choices True ist.

    Returns:
        tuple: (answer_part, latency, len_thinking, len_answer)
    """
    question_context = ""
    if add_rag_context:
        # Initialisiere VectorRetriever hier, assuming 'driver', 'embedder', 'db_name' sind verfügbar
        retriever = VectorRetriever(driver, "nodes", embedder, neo4j_database=db_name)
        result = retriever.search(query_text=query_text, top_k=1)

        if result.items:
            for item in result.items:
                # item.content wird als String-Repräsentation eines Dictionaries erwartet
                dict_item = ast.literal_eval(item.content)
                print(f"DEBUG: Vom Retriever gefundener Knoten (Main Node): {dict_item.get('name')} (ID: {dict_item.get('id')})")
                neighbors_of_main_item = get_neighborhood(driver, dict_item["id"])
                question_context += build_question_context(dict_item, neighbors_of_main_item)
        else:
            print(f"Warning: Keine relevanten Knoten für die Abfrage gefunden: '{query_text}'")

    full_query = query_text
    if add_choices and choices:
        choices_str = ", ".join(choices)
        full_query += f"\n\nChoose from the following options: {choices_str}"

    system_instruction = ""
    if add_rag_context:
        system_instruction = question_context
        # Weist das LLM an, den bereitgestellten Kontext zu verwenden
        system_instruction += "\n\nBasierend auf dem bereitgestellten Kontext und der Frage, geben Sie bitte die genaueste Antwort. Beginnen Sie Ihre Antwort nach </think>."
    else:
        system_instruction = "Bitte beantworten Sie die folgende Frage. Beginnen Sie Ihre Antwort nach </think>."

    start_time = time.time()
    response = llm.invoke(
        input=full_query,
        system_instruction=system_instruction
    )
    latency = round(time.time() - start_time, 4)

    full_response_content = response.content

    # Trenne die Antwort in Denk- und Antwortteile basierend auf dem </think>-Token
    thinking_part = ""
    answer_part = full_response_content
    if "</think>" in full_response_content:
        parts = full_response_content.split("</think>", 1)
        thinking_part = parts[0].strip()
        answer_part = parts[1].strip()

    len_thinking = len(thinking_part)
    len_answer = len(answer_part)

    return answer_part, latency, len_thinking, len_answer

# --- Evaluationsskript ---

# Definiere die Namen der Input- und Output-CSV-Dateien
input_csv_filename = "../AttackSeq-Technique-Test.csv"
output_csv_filename = "evaluation_results.csv"

# Definiere die Header für die Output-CSV-Datei
fieldnames = [
    "Question ID",
    "Question", # Hinzugefügt zur besseren Fehlersuche der Ergebnisse
    "Ground Truth", # Hinzugefügt zur besseren Fehlersuche der Ergebnisse
    "Answer LLM (Question Only)", "Duration (QO)", "Length Thinking (QO)", "Length Answer (QO)", "Correctness (QO)",
    "Answer LLM (RAG)", "Duration (RAG)", "Length Thinking (RAG)", "Length Answer (RAG)", "Correctness (RAG)",
    "Answer LLM (Choices)", "Duration (Choices)", "Length Thinking (Choices)", "Length Answer (Choices)", "Correctness (Choices)"
]

print(f"Starte die Evaluation von '{input_csv_filename}'...")
with open(input_csv_filename, mode="r", newline="", encoding="utf-8") as infile, \
     open(output_csv_filename, mode="w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        question_id = row.get("Question ID", "")
        question_text = row.get("Question", "")
        # Bereite Ground Truth für die Suche vor (Kleinschreibung)
        ground_truth = row.get("Ground Truth", "").strip().lower()

        unshuffled_choices_str = row.get("Unshuffled Choices", "")
        # Teile die Choices-Zeichenkette in eine Liste, Leerzeichen von jeder Option entfernen
        unshuffled_choices = [c.strip() for c in unshuffled_choices_str.split(',')] if unshuffled_choices_str else []

        print(f"\n---------- {question_id} ----------")
        print(f"Question: {question_text}")

        # Dictionary zum Speichern der Ergebnisse für die aktuelle Zeile
        results = {
            "Question ID": question_id,
            "Question": question_text,
            "Ground Truth": row.get("Ground Truth", "") # Speichere die ursprüngliche Ground Truth
        }

        # --- Szenario 1: Nur Frage ---
        answer_qo, latency_qo, len_thinking_qo, len_answer_qo = evaluate_approach(question_text, add_rag_context=False, add_choices=False)
        # Korrektheitsprüfung: Ground Truth im Antworttext enthalten
        correctness_qo = "Correct" if ground_truth in answer_qo.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Question Only)": answer_qo,
            "Duration (QO)": latency_qo,
            "Length Thinking (QO)": len_thinking_qo,
            "Length Answer (QO)": len_answer_qo,
            "Correctness (QO)": correctness_qo
        })
        print(f"  [QO] Answer: '{answer_qo}' | Correct: {correctness_qo}")

        # --- Szenario 2: Frage + RAG Graph Wissen ---
        answer_rag, latency_rag, len_thinking_rag, len_answer_rag = evaluate_approach(question_text, add_rag_context=True, add_choices=False)
        # Korrektheitsprüfung: Ground Truth im Antworttext enthalten
        correctness_rag = "Correct" if ground_truth in answer_rag.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (RAG)": answer_rag,
            "Duration (RAG)": latency_rag,
            "Length Thinking (RAG)": len_thinking_rag,
            "Length Answer (RAG)": len_answer_rag,
            "Correctness (RAG)": correctness_rag
        })
        print(f"  [RAG] Answer: '{answer_rag}' | Correct: {correctness_rag}")

        # --- Szenario 3: Frage + RAG Graph Wissen + Antwortmöglichkeiten ---
        answer_choices, latency_choices, len_thinking_choices, len_answer_choices = evaluate_approach(question_text, add_rag_context=True, add_choices=True, choices=unshuffled_choices)
        # Korrektheitsprüfung: Ground Truth im Antworttext enthalten
        correctness_choices = "Correct" if ground_truth in answer_choices.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Choices)": answer_choices,
            "Duration (Choices)": latency_choices,
            "Length Thinking (Choices)": len_thinking_choices,
            "Length Answer (Choices)": len_answer_choices,
            "Correctness (Choices)": correctness_choices
        })
        print(f"  [Choices] Answer: '{answer_choices}' | Correct: {correctness_choices}")

        # Schreibe die gesammelten Ergebnisse für die aktuelle Zeile in die CSV
        writer.writerow(results)

# Schließe die Neo4j-Treiberverbindung, nachdem alle Operationen abgeschlossen sind
# WICHTIG: Stellen Sie sicher, dass 'driver' in Ihrem vollständigen Skript zugänglich und korrekt importiert/initialisiert ist
driver.close()
print(f"\nEvaluation abgeschlossen. Ergebnisse gespeichert in '{output_csv_filename}'")

In [None]:
import csv
import time
import ast # Needed for ast.literal_eval

# Annahme: Die folgenden Imports und Initialisierungen sind in Ihrer Umgebung bereits vorhanden:
# from neo4j import GraphDatabase
# from neo4j_graphrag.embeddings import OllamaEmbeddings
# from neo4j_graphrag.retrievers import VectorRetriever
# from neo4j_graphrag.llm import OllamaLLM

# Annahme: 'driver', 'embedder', 'llm', 'db_name' sind global verfügbar und initialisiert.
# Beispiel (falls noch nicht geschehen, ersetzen Sie die Platzhalter):
# from os import getenv
# from dotenv import load_dotenv
# load_dotenv(".env")
# db_uri = getenv("db_uri")
# db_name = getenv("db_name")
# db_username = getenv("db_username")
# db_password = getenv("db_password")
# auth = (db_username, db_password)
# driver = GraphDatabase.driver(uri=db_uri, auth=auth)
# embedder = OllamaEmbeddings(model="nomic-embed-text") # Oder Ihr spezifisches Modell
# llm = OllamaLLM(model_name="deepseek-r1:1.5b") # Oder Ihr spezifisches Modell


# --- Funktionen aus Ihrem ursprünglichen Ansatz (unverändert beibehalten) ---

def get_neighborhood(driver, node_id):
    """
    Ruft die direkten Nachbarn eines bestimmten Knotens aus dem Neo4j-Graphen ab.
    """
    with (driver.session(database=db_name) as session):
        result = session.run("""
            MATCH (n)-[r]-(m)
            WHERE n.id = $id
            RETURN DISTINCT m, type(r) AS rel_type, r.description AS rel_desc
        """, id=node_id)
        return [(record["m"], record["rel_type"], record["rel_desc"]) for record in result]

def build_question_context(main_node, neighbors):
    """
    Erstellt einen String-Kontext für das LLM basierend auf dem Hauptknoten und seinen Nachbarn.
    """
    parts = []
    parts.append("Best similarity search (the main node) is: " + f""""{main_node.get('name')}" of type "{main_node.get('type')}". Description of "{main_node.get('name')}": {main_node.get('description')}""")
    parts.append("\nThe main node's neighbors are the following nodes:")

    for neighbor, rel_type, rel_desc in neighbors:
        parts.append(f"""
+++++ {neighbor.get('name').upper()} +++++
Node "{neighbor.get('name')}" of type "{neighbor.get('type')}". Description of "{neighbor.get('name')}": {neighbor.get('description')}
The main node is related to "{neighbor.get('name')}" via type "{rel_type}". This relationship contains the following description: {rel_desc}""")
    return "\n".join(parts)

# --- Ende der Funktionen aus Ihrem ursprünglichen Ansatz ---


# Haupt-Ansatzfunktion (modifiziert für 3 Szenarien und DEBUG-Ausgabe)
def evaluate_approach(query_text, add_rag_context=False, add_choices=False, choices=None):
    """
    Führt eine Abfrage an das LLM mit verschiedenen Kontexttypen aus.

    Args:
        query_text (str): Die Hauptfrage an das LLM.
        add_rag_context (bool): Ob der RAG-abgerufene Graphkontext hinzugefügt werden soll.
        add_choices (bool): Ob Antwortmöglichkeiten an die Abfrage angehängt werden sollen.
        choices (list): Eine Liste von Antwortmöglichkeiten als Strings, wird verwendet, wenn add_choices True ist.

    Returns:
        tuple: (answer_part, latency, len_thinking, len_answer)
    """
    question_context = ""
    found_node_info = "" # Für die Ausgabe der gefundenen Knoten

    if add_rag_context:
        retriever = VectorRetriever(driver, "nodes", embedder, neo4j_database=db_name)
        result = retriever.search(query_text=query_text, top_k=1)

        if result.items:
            for item in result.items:
                dict_item = ast.literal_eval(item.content)

                # Speichere die Information über den gefundenen Knoten für die spätere Ausgabe
                found_node_info = f"DEBUG: Gefundener Knoten (Main Node): Name='{dict_item.get('name')}', Type='{dict_item.get('type')}', ID='{dict_item.get('id')}'"

                neighbors_of_main_item = get_neighborhood(driver, dict_item["id"])
                question_context += build_question_context(dict_item, neighbors_of_main_item)
        else:
            found_node_info = f"Warning: Keine relevanten Knoten für die Abfrage gefunden: '{query_text}'"

    full_query = query_text
    if add_choices and choices:
        choices_str = ", ".join(choices)
        full_query += f"\n\nChoose from the following options: {choices_str}"

    system_instruction = ""
    if add_rag_context:
        system_instruction = question_context
        # Weist das LLM an, den bereitgestellten Kontext zu verwenden
        system_instruction += "\n\nAs an IT security expert, based on the provided context and the question, please provide the most accurate answer in English. Start your answer after </think>."
    else:
        # System-Anweisung für den "Nur Frage"-Modus
        system_instruction = "You are an IT security expert. Please answer the following question in English. Start your answer after </think>."

    start_time = time.time()
    response = llm.invoke(
        input=full_query,
        system_instruction=system_instruction
    )
    latency = round(time.time() - start_time, 4)

    full_response_content = response.content

    thinking_part = ""
    answer_part = full_response_content
    if "</think>" in full_response_content:
        parts = full_response_content.split("</think>", 1)
        thinking_part = parts[0].strip()
        answer_part = parts[1].strip()

    len_thinking = len(thinking_part)
    len_answer = len(answer_part)

    return answer_part, latency, len_thinking, len_answer, found_node_info # Gebe found_node_info zurück


# --- Evaluationsskript ---

# Definiere die Namen der Input- und Output-CSV-Dateien
input_csv_filename = "../AttackSeq-Technique-Test.csv"
output_csv_filename = "evaluation_results.csv"

# Definiere die Header für die Output-CSV-Datei
fieldnames = [
    "Question ID",
    "Question",
    "Ground Truth",
    "Answer LLM (Question Only)", "Duration (QO)", "Length Thinking (QO)", "Length Answer (QO)", "Correctness (QO)",
    "Answer LLM (RAG)", "Duration (RAG)", "Length Thinking (RAG)", "Length Answer (RAG)", "Correctness (RAG)", "Found Node RAG",
    "Answer LLM (Choices)", "Duration (Choices)", "Length Thinking (Choices)", "Length Answer (Choices)", "Correctness (Choices)", "Found Node Choices"
]

print(f"Starte die Evaluation von '{input_csv_filename}'...")
with open(input_csv_filename, mode="r", newline="", encoding="utf-8") as infile, \
     open(output_csv_filename, mode="w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        question_id = row.get("Question ID", "")
        question_text = row.get("Question", "")
        # Bereite Ground Truth für die Suche vor (Kleinschreibung)
        ground_truth = row.get("Ground Truth", "").strip().lower()

        unshuffled_choices_str = row.get("Unshuffled Choices", "")
        unshuffled_choices = [c.strip() for c in unshuffled_choices_str.split(',')] if unshuffled_choices_str else []

        print(f"\n---------- {question_id} ----------")
        print(f"Question: {question_text}")

        results = {
            "Question ID": question_id,
            "Question": question_text,
            "Ground Truth": row.get("Ground Truth", "")
        }

        # --- Szenario 1: Nur Frage ---
        # Keine gefundenen Knoten hier, daher ignorieren wir found_node_info
        answer_qo, latency_qo, len_thinking_qo, len_answer_qo, _ = evaluate_approach(question_text, add_rag_context=False, add_choices=False)
        correctness_qo = "Correct" if ground_truth in answer_qo.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Question Only)": answer_qo,
            "Duration (QO)": latency_qo,
            "Length Thinking (QO)": len_thinking_qo,
            "Length Answer (QO)": len_answer_qo,
            "Correctness (QO)": correctness_qo
        })
        print(f"  [QO] Answer: '{answer_qo}' | Correct: {correctness_qo}")

        # --- Szenario 2: Frage + RAG Graph Wissen ---
        answer_rag, latency_rag, len_thinking_rag, len_answer_rag, found_node_rag_info = evaluate_approach(question_text, add_rag_context=True, add_choices=False)
        correctness_rag = "Correct" if ground_truth in answer_rag.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (RAG)": answer_rag,
            "Duration (RAG)": latency_rag,
            "Length Thinking (RAG)": len_thinking_rag,
            "Length Answer (RAG)": len_answer_rag,
            "Correctness (RAG)": correctness_rag,
            "Found Node RAG": found_node_rag_info # Füge die Info des gefundenen Knotens hinzu
        })
        print(f"  [RAG] {found_node_rag_info}") # Ausgabe des gefundenen Knotens
        print(f"  [RAG] Answer: '{answer_rag}' | Correct: {correctness_rag}")

        # --- Szenario 3: Frage + RAG Graph Wissen + Antwortmöglichkeiten ---
        answer_choices, latency_choices, len_thinking_choices, len_answer_choices, found_node_choices_info = evaluate_approach(question_text, add_rag_context=True, add_choices=True, choices=unshuffled_choices)
        correctness_choices = "Correct" if ground_truth in answer_choices.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Choices)": answer_choices,
            "Duration (Choices)": latency_choices,
            "Length Thinking (Choices)": len_thinking_choices,
            "Length Answer (Choices)": len_answer_choices,
            "Correctness (Choices)": correctness_choices,
            "Found Node Choices": found_node_choices_info # Füge die Info des gefundenen Knotens hinzu
        })
        print(f"  [Choices] {found_node_choices_info}") # Ausgabe des gefundenen Knotens
        print(f"  [Choices] Answer: '{answer_choices}' | Correct: {correctness_choices}")

        writer.writerow(results)

# Stellen Sie sicher, dass 'driver' in Ihrem Hauptskript geschlossen wird,
# wenn die gesamte Ausführung beendet ist.
# driver.close() # Dies sollte außerhalb dieser Datei nach Abschluss aller Evaluierungen erfolgen.
print(f"\nEvaluation abgeschlossen. Ergebnisse gespeichert in '{output_csv_filename}'")

 Bestes Skript bisher #####################################################

In [None]:
import csv
import time
import ast # Needed for ast.literal_eval

# Annahme: Die folgenden Imports und Initialisierungen sind in Ihrer Umgebung bereits vorhanden:
# from neo4j import GraphDatabase
# from neo4j_graphrag.embeddings import OllamaEmbeddings
# from neo4j_graphrag.retrievers import VectorRetriever
# from neo4j_graphrag.llm import OllamaLLM

# Annahme: 'driver', 'embedder', 'llm', 'db_name' sind global verfügbar und initialisiert.
# Beispiel (falls noch nicht geschehen, ersetzen Sie die Platzhalter):
# from os import getenv
# from dotenv import load_dotenv
# load_dotenv(".env")
# db_uri = getenv("db_uri")
# db_name = getenv("db_name")
# db_username = getenv("db_username")
# db_password = getenv("db_password")
# auth = (db_username, db_password)
# driver = GraphDatabase.driver(uri=db_uri, auth=auth)
# embedder = OllamaEmbeddings(model="nomic-embed-text") # Oder Ihr spezifisches Modell
# llm = OllamaLLM(model_name="deepseek-r1:1.5b") # Oder Ihr spezifisches Modell


# --- Funktionen aus Ihrem ursprünglichen Ansatz (unverändert beibehalten) ---

def get_neighborhood(driver, node_id):
    """
    Ruft die direkten Nachbarn eines bestimmten Knotens aus dem Neo4j-Graphen ab.
    """
    with (driver.session(database=db_name) as session):
        result = session.run("""
            MATCH (n)-[r]-(m)
            WHERE n.id = $id
            RETURN DISTINCT m, type(r) AS rel_type, r.description AS rel_desc
        """, id=node_id)
        # Return the list of neighbors and their count
        neighbors = [(record["m"], record["rel_type"], record["rel_desc"]) for record in result]
        return neighbors, len(neighbors)

def build_question_context(main_node, neighbors):
    """
    Erstellt einen String-Kontext für das LLM basierend auf dem Hauptknoten und seinen Nachbarn.
    """
    parts = []
    parts.append("Best similarity search (the main node) is: " + f""""{main_node.get('name')}" of type "{main_node.get('type')}". Description of "{main_node.get('name')}": {main_node.get('description')}""")
    parts.append("\nThe main node's neighbors are the following nodes:")

    for neighbor, rel_type, rel_desc in neighbors:
        parts.append(f"""
+++++ {neighbor.get('name').upper()} +++++
Node "{neighbor.get('name')}" of type "{neighbor.get('type')}". Description of "{neighbor.get('name')}": {neighbor.get('description')}
The main node is related to "{neighbor.get('name')}" via type "{rel_type}". This relationship contains the following description: {rel_desc}""")
    return "\n".join(parts)

# --- Ende der Funktionen aus Ihrem ursprünglichen Ansatz ---


# Haupt-Ansatzfunktion (modifiziert für 4 Szenarien)
def evaluate_approach(query_text, add_rag_context=False, add_choices=False, choices=None):
    """
    Führt eine Abfrage an das LLM mit verschiedenen Kontexttypen aus.

    Args:
        query_text (str): Die Hauptfrage an das LLM.
        add_rag_context (bool): Ob der RAG-abgerufene Graphkontext hinzugefügt werden soll.
        add_choices (bool): Ob Antwortmöglichkeiten an die Abfrage angehängt werden sollen.
        choices (list): Eine Liste von Antwortmöglichkeiten als Strings, wird verwendet, wenn add_choices True ist.

    Returns:
        tuple: (answer_part, latency, len_thinking, len_answer, full_llm_input_combined, main_retrieved_node_name, num_retrieved_neighbors, choices_sent_to_llm)
    """
    question_context_for_llm_sys_instruction = "" # Nur der tatsächliche Kontext für die Systemanweisung
    main_retrieved_node_name = "N/A" # Name des primär gefundenen Knotens
    num_retrieved_neighbors = 0 # Anzahl der Nachbarknoten
    choices_sent_to_llm = "N/A" # Initialisiere als N/A

    if add_rag_context:
        retriever = VectorRetriever(driver, "nodes", embedder, neo4j_database=db_name)
        result = retriever.search(query_text=query_text, top_k=1)

        if result.items:
            for item in result.items:
                dict_item = ast.literal_eval(item.content)

                # Speichere den Namen des gefundenen Knotens
                main_retrieved_node_name = dict_item.get('name', 'N/A')

                # get_neighborhood gibt jetzt (neighbors_list, count) zurück
                neighbors_of_main_item, num_retrieved_neighbors = get_neighborhood(driver, dict_item["id"])
                # Dieser Kontext wird NUR für die Systemanweisung verwendet
                question_context_for_llm_sys_instruction += build_question_context(dict_item, neighbors_of_main_item)
        else:
            main_retrieved_node_name = f"Warning: No relevant nodes found for query: '{query_text}'"
            num_retrieved_neighbors = 0

    full_query_to_llm = query_text # Dies ist die 'input'-Komponente an llm.invoke

    if add_choices and choices:
        choices_str = ", ".join(choices)
        full_query_to_llm += f"\n\nChoose from the following options: {choices_str}"
        choices_sent_to_llm = choices_str # Speichere die Choices, die an das LLM gesendet wurden

    system_instruction = ""
    if add_rag_context:
        system_instruction = question_context_for_llm_sys_instruction # Hier wird der Graphenkontext eingefügt
        # Weist das LLM an, den bereitgestellten Kontext zu verwenden
        system_instruction += "\n\nAs an IT security expert, based on the provided context and the question, please provide the most accurate answer in English. Start your answer after </think>."
    else:
        # System-Anweisung für den "Nur Frage"-Modus (oder Frage + Choices ohne RAG)
        system_instruction = "You are an IT security expert. Please answer the following question in English. Start your answer after </think>."

    start_time = time.time()
    response = llm.invoke(
        input=full_query_to_llm, # Dies ist die 'input'-Komponente
        system_instruction=system_instruction
    )
    latency = round(time.time() - start_time, 4)

    full_response_content = response.content

    thinking_part = ""
    answer_part = full_response_content
    if "</think>" in full_response_content:
        parts = full_response_content.split("</think>", 1)
        thinking_part = parts[0].strip()
        answer_part = parts[1].strip()

    len_thinking = len(thinking_part)
    len_answer = len(answer_part)

    # WICHTIG: Hier wird der 'full_llm_input_combined' für die CSV-Ausgabe generiert.
    # Er enthält NICHT den detaillierten Graphenkontext, um die CSV nicht zu sprengen.
    # Der Graphenkontext ist nur in 'system_instruction' für das LLM selbst enthalten.
    display_system_instruction_for_csv = "As an IT security expert, based on the provided context and the question, please provide the most accurate answer in English. Start your answer after </think>."
    if not add_rag_context:
        display_system_instruction_for_csv = "You are an IT security expert. Please answer the following question in English. Start your answer after </think>."

    full_llm_input_combined = f"System Instruction (simplified for CSV):\n{display_system_instruction_for_csv}\n\nUser Input:\n{full_query_to_llm}"


    return answer_part, latency, len_thinking, len_answer, full_llm_input_combined, main_retrieved_node_name, num_retrieved_neighbors, choices_sent_to_llm


# --- Evaluationsskript ---

# Definiere die Namen der Input- und Output-CSV-Dateien
input_csv_filename = "AttackSeq-Technique_100.csv"
output_csv_filename = "evaluation_results.csv"

# Definiere die Header für die Output-CSV-Datei
fieldnames = [
    "Question ID",
    "Question",
    "Ground Truth",
    "Answer LLM (QO)", "Duration (QO)", "Length Thinking (QO)", "Length Answer (QO)", "Correctness (QO)", "LLM Input (QO)",
    "Answer LLM (RAG)", "Duration (RAG)", "Length Thinking (RAG)", "Length Answer (RAG)", "Correctness (RAG)", "Main Retrieved Node Name (RAG)", "Num Retrieved Neighbors (RAG)", "LLM Input (RAG)",
    "Answer LLM (Choices)", "Duration (Choices)", "Length Thinking (Choices)", "Length Answer (Choices)", "Correctness (Choices)", "Main Retrieved Node Name (Choices)", "Num Retrieved Neighbors (Choices)", "LLM Input (Choices)", "LLM Choices (Choices)",
    "Answer LLM (Choices No RAG)", "Duration (Choices No RAG)", "Length Thinking (Choices No RAG)", "Length Answer (Choices No RAG)", "Correctness (Choices No RAG)", "LLM Input (Choices No RAG)", "LLM Choices (Choices No RAG)"
]

print(f"Starte die Evaluation von '{input_csv_filename}'...")
with open(input_csv_filename, mode="r", newline="", encoding="utf-8") as infile, \
     open(output_csv_filename, mode="w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        question_id = row.get("Question ID", "")
        question_text = row.get("Question", "")
        ground_truth = row.get("Ground Truth", "").strip().lower()

        unshuffled_choices_str = row.get("Unshuffled Choices", "")
        unshuffled_choices = [c.strip() for c in unshuffled_choices_str.split(',')] if unshuffled_choices_str else []

        print(f"\n---------- {question_id} ----------")
        print(f"Question: {question_text}")

        results = {
            "Question ID": question_id,
            "Question": question_text,
            "Ground Truth": row.get("Ground Truth", "")
        }

        # --- Szenario 1: Nur Frage (Question Only) ---
        # Ignoriere main_retrieved_node_name, num_retrieved_neighbors und choices_sent_to_llm
        answer_qo, latency_qo, len_thinking_qo, len_answer_qo, llm_input_qo, _, _, _ = evaluate_approach(question_text, add_rag_context=False, add_choices=False)
        correctness_qo = "Correct" if ground_truth in answer_qo.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (QO)": answer_qo,
            "Duration (QO)": latency_qo,
            "Length Thinking (QO)": len_thinking_qo,
            "Length Answer (QO)": len_answer_qo,
            "Correctness (QO)": correctness_qo,
            "LLM Input (QO)": llm_input_qo
        })
        print(f"  [QO] Answer: '{answer_qo}' | Correct: {correctness_qo}")

        # --- Szenario 2: Frage + RAG Graph Wissen (RAG) ---
        answer_rag, latency_rag, len_thinking_rag, len_answer_rag, llm_input_rag, main_retrieved_node_name_rag, num_retrieved_neighbors_rag, _ = evaluate_approach(question_text, add_rag_context=True, add_choices=False)
        correctness_rag = "Correct" if ground_truth in answer_rag.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (RAG)": answer_rag,
            "Duration (RAG)": latency_rag,
            "Length Thinking (RAG)": len_thinking_rag,
            "Length Answer (RAG)": len_answer_rag,
            "Correctness (RAG)": correctness_rag,
            "Main Retrieved Node Name (RAG)": main_retrieved_node_name_rag,
            "Num Retrieved Neighbors (RAG)": num_retrieved_neighbors_rag,
            "LLM Input (RAG)": llm_input_rag
        })
        print(f"  [RAG] Main Node: '{main_retrieved_node_name_rag}', Neighbors: {num_retrieved_neighbors_rag}")
        print(f"  [RAG] Answer: '{answer_rag}' | Correct: {correctness_rag}")

        # --- Szenario 3: Frage + RAG Graph Wissen + Antwortmöglichkeiten (Choices) ---
        answer_choices, latency_choices, len_thinking_choices, len_answer_choices, llm_input_choices, main_retrieved_node_name_choices, num_retrieved_neighbors_choices, choices_sent_to_llm_data = evaluate_approach(question_text, add_rag_context=True, add_choices=True, choices=unshuffled_choices)
        correctness_choices = "Correct" if ground_truth in answer_choices.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Choices)": answer_choices,
            "Duration (Choices)": latency_choices,
            "Length Thinking (Choices)": len_thinking_choices,
            "Length Answer (Choices)": len_answer_choices,
            "Correctness (Choices)": correctness_choices,
            "Main Retrieved Node Name (Choices)": main_retrieved_node_name_choices,
            "Num Retrieved Neighbors (Choices)": num_retrieved_neighbors_choices,
            "LLM Input (Choices)": llm_input_choices,
            "LLM Choices (Choices)": choices_sent_to_llm_data
        })
        print(f"  [Choices] Main Node: '{main_retrieved_node_name_choices}', Neighbors: {num_retrieved_neighbors_choices}")
        print(f"  [Choices] Choices Sent: '{choices_sent_to_llm_data}'")
        print(f"  [Choices] Answer: '{answer_choices}' | Correct: {correctness_choices}")

        # --- Szenario 4: Frage + Antwortmöglichkeiten OHNE RAG (Choices No RAG) ---
        # Ignoriere main_retrieved_node_name, num_retrieved_neighbors
        answer_choices_no_rag, latency_choices_no_rag, len_thinking_choices_no_rag, len_answer_choices_no_rag, llm_input_choices_no_rag, _, _, choices_sent_to_llm_no_rag_data = evaluate_approach(question_text, add_rag_context=False, add_choices=True, choices=unshuffled_choices)
        correctness_choices_no_rag = "Correct" if ground_truth in answer_choices_no_rag.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Choices No RAG)": answer_choices_no_rag,
            "Duration (Choices No RAG)": latency_choices_no_rag,
            "Length Thinking (Choices No RAG)": len_thinking_choices_no_rag,
            "Length Answer (Choices No RAG)": len_answer_choices_no_rag,
            "Correctness (Choices No RAG)": correctness_choices_no_rag,
            "LLM Input (Choices No RAG)": llm_input_choices_no_rag,
            "LLM Choices (Choices No RAG)": choices_sent_to_llm_no_rag_data
        })
        print(f"  [Choices No RAG] Choices Sent: '{choices_sent_to_llm_no_rag_data}'")
        print(f"  [Choices No RAG] Answer: '{answer_choices_no_rag}' | Correct: {correctness_choices_no_rag}")

        writer.writerow(results)

# IMPORTANT: Ensure 'driver.close()' is called *once* at the very end of your main script,
# after all evaluations and database interactions are complete, to avoid DeprecationWarnings.
# Example:
# driver.close()
print(f"\nEvaluation complete. Results saved to '{output_csv_filename}'")

Starte die Evaluation von 'AttackSeq-Technique_100.csv'...

---------- 2 ----------
Question: Before SectorB06 achieved persistence by adding malicious files like RasTls.dll and IntelGraphicsController.exe to the registry run keys, which ATT&CK technique most likely occurred as the malware manipulated file ownership using CreateProcess and deleted initial traces such as winword.exe?


#####################################################













#####################################################

In [13]:
import csv
import time
import ast # Needed for ast.literal_eval

# Annahme: Die folgenden Imports und Initialisierungen sind in Ihrer Umgebung bereits vorhanden:
# from neo4j import GraphDatabase
# from neo4j_graphrag.embeddings import OllamaEmbeddings
# from neo4j_graphrag.retrievers import VectorRetriever
# from neo4j_graphrag.llm import OllamaLLM

# Annahme: 'driver', 'embedder', 'llm', 'db_name' sind global verfügbar und initialisiert.
# Beispiel (falls noch nicht geschehen, ersetzen Sie die Platzhalter):
# from os import getenv
# from dotenv import load_dotenv
# load_dotenv(".env")
# db_uri = getenv("db_uri")
# db_name = getenv("db_name")
# db_username = getenv("db_username")
# db_password = getenv("db_password")
# auth = (db_username, db_password)
# driver = GraphDatabase.driver(uri=db_uri, auth=auth)
# embedder = OllamaEmbeddings(model="nomic-embed-text") # Oder Ihr spezifisches Modell
# llm = OllamaLLM(model_name="deepseek-r1:1.5b") # Oder Ihr spezifisches Modell


# --- Funktionen aus Ihrem ursprünglichen Ansatz (unverändert beibehalten) ---

def get_neighborhood(driver, node_id):
    """
    Ruft die direkten Nachbarn eines bestimmten Knotens aus dem Neo4j-Graphen ab.
    """
    with (driver.session(database=db_name) as session):
        result = session.run("""
            MATCH (n)-[r]-(m)
            WHERE n.id = $id
            RETURN DISTINCT m, type(r) AS rel_type, r.description AS rel_desc
        """, id=node_id)
        return [(record["m"], record["rel_type"], record["rel_desc"]) for record in result]

def build_question_context(main_node, neighbors):
    """
    Erstellt einen String-Kontext für das LLM basierend auf dem Hauptknoten und seinen Nachbarn.
    """
    parts = []
    parts.append("Best similarity search (the main node) is: " + f""""{main_node.get('name')}" of type "{main_node.get('type')}". Description of "{main_node.get('name')}": {main_node.get('description')}""")
    parts.append("\nThe main node's neighbors are the following nodes:")

    for neighbor, rel_type, rel_desc in neighbors:
        parts.append(f"""
+++++ {neighbor.get('name').upper()} +++++
Node "{neighbor.get('name')}" of type "{neighbor.get('type')}". Description of "{neighbor.get('name')}": {neighbor.get('description')}
The main node is related to "{neighbor.get('name')}" via type "{rel_type}". This relationship contains the following description: {rel_desc}""")
    return "\n".join(parts)

# --- Ende der Funktionen aus Ihrem ursprünglichen Ansatz ---


# Haupt-Ansatzfunktion (modifiziert für 3 Szenarien und DEBUG-Ausgabe)
def evaluate_approach(query_text, add_rag_context=False, add_choices=False, choices=None):
    """
    Führt eine Abfrage an das LLM mit verschiedenen Kontexttypen aus.

    Args:
        query_text (str): Die Hauptfrage an das LLM.
        add_rag_context (bool): Ob der RAG-abgerufene Graphkontext hinzugefügt werden soll.
        add_choices (bool): Ob Antwortmöglichkeiten an die Abfrage angehängt werden sollen.
        choices (list): Eine Liste von Antwortmöglichkeiten als Strings, wird verwendet, wenn add_choices True ist.

    Returns:
        tuple: (answer_part, latency, len_thinking, len_answer, full_llm_input, found_node_info)
    """
    question_context = ""
    found_node_info = "N/A" # Initialisiere mit N/A, wenn kein Knoten gefunden wird (für QO)

    if add_rag_context:
        retriever = VectorRetriever(driver, "nodes", embedder, neo4j_database=db_name)
        result = retriever.search(query_text=query_text, top_k=1)

        if result.items:
            for item in result.items:
                dict_item = ast.literal_eval(item.content)

                # Speichere die Information über den gefundenen Knoten für die spätere Ausgabe
                found_node_info = f"Name='{dict_item.get('name')}', Type='{dict_item.get('type')}', ID='{dict_item.get('id')}'"

                neighbors_of_main_item = get_neighborhood(driver, dict_item["id"])
                question_context += build_question_context(dict_item, neighbors_of_main_item)
        else:
            found_node_info = f"Warning: No relevant nodes found for query: '{query_text}'"

    full_query_to_llm = query_text # Dies ist die 'input'-Komponente an llm.invoke

    if add_choices and choices:
        choices_str = ", ".join(choices)
        full_query_to_llm += f"\n\nChoose from the following options: {choices_str}"

    system_instruction = ""
    if add_rag_context:
        system_instruction = question_context
        # Weist das LLM an, den bereitgestellten Kontext zu verwenden
        system_instruction += "\n\nAs an IT security expert, based on the provided context and the question, please provide the most accurate answer in English. Start your answer after </think>."
    else:
        # System-Anweisung für den "Nur Frage"-Modus
        system_instruction = "You are an IT security expert. Please answer the following question in English. Start your answer after </think>."

    start_time = time.time()
    response = llm.invoke(
        input=full_query_to_llm, # Dies ist die 'input'-Komponente
        system_instruction=system_instruction
    )
    latency = round(time.time() - start_time, 4)

    full_response_content = response.content

    thinking_part = ""
    answer_part = full_response_content
    if "</think>" in full_response_content:
        parts = full_response_content.split("</think>", 1)
        thinking_part = parts[0].strip()
        answer_part = parts[1].strip()

    len_thinking = len(thinking_part)
    len_answer = len(answer_part)

    # Rückgabe des vollständigen LLM-Prompts (Input + System Instruction) zur Analyse
    full_llm_input = f"System Instruction:\n{system_instruction}\n\nUser Input:\n{full_query_to_llm}"

    return answer_part, latency, len_thinking, len_answer, full_llm_input, found_node_info


# --- Evaluationsskript ---

# Definiere die Namen der Input- und Output-CSV-Dateien
input_csv_filename = "../AttackSeq-Technique-Test.csv"
output_csv_filename = "evaluation_results.csv"

# Definiere die Header für die Output-CSV-Datei
# HINWEIS: 'Found Node RAG' und 'Found Node Choices' wurden HINZUGEFÜGT.
fieldnames = [
    "Question ID",
    "Question",
    "Retrieved Nodes (RAG)", # Dieses Feld bleibt bestehen für die Konsolen-/Zwischenausgabe
    "Ground Truth",
    "Answer LLM (Question Only)", "Duration (QO)", "Length Thinking (QO)", "Length Answer (QO)", "Correctness (QO)", "LLM Input (QO)",
    "Answer LLM (RAG)", "Duration (RAG)", "Length Thinking (RAG)", "Length Answer (RAG)", "Correctness (RAG)", "Found Node RAG", "LLM Input (RAG)",
    "Answer LLM (Choices)", "Duration (Choices)", "Length Thinking (Choices)", "Length Answer (Choices)", "Correctness (Choices)", "Found Node Choices", "LLM Input (Choices)"
]

print(f"Starte die Evaluation von '{input_csv_filename}'...")
with open(input_csv_filename, mode="r", newline="", encoding="utf-8") as infile, \
     open(output_csv_filename, mode="w", newline="", encoding="utf-8") as outfile:

    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        question_id = row.get("Question ID", "")
        question_text = row.get("Question", "")
        ground_truth = row.get("Ground Truth", "").strip().lower()

        unshuffled_choices_str = row.get("Unshuffled Choices", "")
        unshuffled_choices = [c.strip() for c in unshuffled_choices_str.split(',')] if unshuffled_choices_str else []

        print(f"\n---------- {question_id} ----------")
        print(f"Question: {question_text}")

        results = {
            "Question ID": question_id,
            "Question": question_text,
            "Ground Truth": row.get("Ground Truth", "")
        }

        # --- Szenario 1: Nur Frage ---
        answer_qo, latency_qo, len_thinking_qo, len_answer_qo, llm_input_qo, _ = evaluate_approach(question_text, add_rag_context=False, add_choices=False)
        correctness_qo = "Correct" if ground_truth in answer_qo.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Question Only)": answer_qo,
            "Duration (QO)": latency_qo,
            "Length Thinking (QO)": len_thinking_qo,
            "Length Answer (QO)": len_answer_qo,
            "Correctness (QO)": correctness_qo,
            "LLM Input (QO)": llm_input_qo
        })
        print(f"  [QO] Answer: '{answer_qo}' | Correct: {correctness_qo}")

        # --- Szenario 2: Frage + RAG Graph Wissen ---
        answer_rag, latency_rag, len_thinking_rag, len_answer_rag, llm_input_rag, found_node_rag_info = evaluate_approach(question_text, add_rag_context=True, add_choices=False)
        correctness_rag = "Correct" if ground_truth in answer_rag.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (RAG)": answer_rag,
            "Duration (RAG)": latency_rag,
            "Length Thinking (RAG)": len_thinking_rag,
            "Length Answer (RAG)": len_answer_rag,
            "Correctness (RAG)": correctness_rag,
            "Found Node RAG": found_node_rag_info, # Store found node info here
            "LLM Input (RAG)": llm_input_rag
        })
        results["Retrieved Nodes (RAG)"] = found_node_rag_info # Fülle das Feld "Retrieved Nodes (RAG)"
        print(f"  [RAG] **Retrieved Nodes**: {found_node_rag_info}") # Output retrieved node to console
        print(f"  [RAG] Answer: '{answer_rag}' | Correct: {correctness_rag}")

        # --- Szenario 3: Frage + RAG Graph Wissen + Antwortmöglichkeiten ---
        answer_choices, latency_choices, len_thinking_choices, len_answer_choices, llm_input_choices, found_node_choices_info = evaluate_approach(question_text, add_rag_context=True, add_choices=True, choices=unshuffled_choices)
        correctness_choices = "Correct" if ground_truth in answer_choices.strip().lower() else "Incorrect"
        results.update({
            "Answer LLM (Choices)": answer_choices,
            "Duration (Choices)": latency_choices,
            "Length Thinking (Choices)": len_thinking_choices,
            "Length Answer (Choices)": len_answer_choices,
            "Correctness (Choices)": correctness_choices,
            "Found Node Choices": found_node_choices_info, # Store found node info here
            "LLM Input (Choices)": llm_input_choices
        })
        # Für den "Retrieved Nodes (RAG)"-Spalte in der CSV können wir hier entweder den gleichen Knoten verwenden
        # oder, falls es aus irgendeinem Grund einen anderen Knoten gibt, diesen gesondert speichern.
        # Da der Retriever immer top_k=1 verwendet und die Query dieselbe ist, ist es wahrscheinlich derselbe Knoten.
        # Ich habe es so gelassen, dass es nur in der "Retrieved Nodes (RAG)" Spalte steht, da es nur EIN Retriever ist.
        # Wenn Sie möchten, dass die gefundene Node für den 'Choices'-Ansatz in einer separaten CSV-Spalte erscheint,
        # müssten Sie eine neue Spalte hinzufügen und hier den Wert zuweisen (z.B. "Retrieved Nodes (Choices)").
        # Aktuell geht die Anweisung davon aus, dass "Retrieved Nodes (RAG)" für beide RAG-Ansätze gilt.
        print(f"  [Choices] **Retrieved Nodes**: {found_node_choices_info}") # Output retrieved node to console
        print(f"  [Choices] Answer: '{answer_choices}' | Correct: {correctness_choices}")

        writer.writerow(results)

# IMPORTANT: Ensure 'driver.close()' is called *once* at the very end of your main script,
# after all evaluations and database interactions are complete, to avoid DeprecationWarnings.
# Example:
# driver.close()
print(f"\nEvaluation complete. Results saved to '{output_csv_filename}'")

Starte die Evaluation von 'AttackSeq-Technique-Test.csv'...

---------- 2 ----------
Question: Before SectorB06 achieved persistence by adding malicious files like RasTls.dll and IntelGraphicsController.exe to the registry run keys, which ATT&CK technique most likely occurred as the malware manipulated file ownership using CreateProcess and deleted initial traces such as winword.exe?


KeyboardInterrupt: 