## Preparation

Install the required packages with `pip install neo4j-graphrag`.

In [3]:
from neo4j import GraphDatabase
from neo4j_graphrag.indexes import create_vector_index, upsert_vectors
from neo4j_graphrag.embeddings import OllamaEmbeddings
from neo4j_graphrag.types import EntityType
from neo4j_graphrag.retrievers import VectorRetriever
import json
import ast
from typing import Any

# Insert path to your STIX ATT&CK file
with open("attack-stix-data/enterprise-attack-17.1.json", "r") as file:
    data = json.load(file)

# Insert your Neo4j instance URL and credentials
URI = "neo4j+s://6224f1f3.databases.neo4j.io"
AUTH = ("neo4j", "DBy7vuJuvsbib8F3FRhIXzIFu5vsgPxs31gJoANwMlo")

driver = GraphDatabase.driver(URI, auth=AUTH)

## KG Construction

In [4]:
def flatten_dict(d) -> dict[Any, str]:
    return {k: str(v) for k, v in d.items()}


def create_sdo_node(tx, label, props) -> None:
    query = f"CREATE (n:SDO:{label} $props)"
    tx.run(query, props=props)


def create_relation(tx, src, tgt, rel_type, props) -> None:
    query = f"""
    MATCH (a {{id: $source_id}})
    MATCH (b {{id: $target_id}})
    MERGE (a)-[r:{rel_type}]->(b)
    SET r += $props
    """
    tx.run(query, source_id=src, target_id=tgt, props=props)


with driver.session() as session:
    for obj in data["objects"]:
        flat_obj = flatten_dict(obj)
        if flat_obj["type"] == "relationship":
            rel_type = flat_obj["relationship_type"].replace("-", "_")
            props = {
                k: v
                for k, v in flat_obj.items()
                if k not in ("source_ref", "target_ref", "relationship_type")
            }
            session.execute_write(
                create_relation,
                flat_obj["source_ref"],
                flat_obj["target_ref"],
                rel_type,
                props,
            )
        else:
            sdo_type = flat_obj["type"].replace("-", "_")
            props = {k: v for k, v in flat_obj.items() if k != "type"}
            session.execute_write(create_sdo_node, sdo_type, props)

In [5]:
def create_contains_technique_relation(tx, tech_id, tac_name) -> None:
    query = f"""
    MATCH (a:attack_pattern {{id: "{tech_id}"}}), (b:x_mitre_tactic {{x_mitre_shortname: "{tac_name}"}})
    MERGE (b)-[r:contains_technique]->(a)
    """
    tx.run(query)

def create_component_of_relation(tx, dc_id, ds_id) -> None:
    query = f"""
    MATCH (a:x_mitre_data_component {{id: "{dc_id}"}}), (b:x_mitre_data_source {{id: "{ds_id}"}})
    MERGE (a)-[r:component_of]->(b)
    """
    tx.run(query)

attack_patterns = [obj for obj in data["objects"] if obj["type"] == "attack-pattern"]
data_components = [obj for obj in data["objects"] if obj["type"] == "x-mitre-data-component"]

with driver.session() as session:
    for ap in attack_patterns: 
        ap_id = ap["id"]

        for phase in ap["kill_chain_phases"]:
            phase_name = phase["phase_name"]
            session.execute_write(create_contains_technique_relation, ap_id, phase_name)

    for dc in data_components:
        dc_id = dc["id"]
        ds_id = dc["x_mitre_data_source_ref"]
        session.execute_write(create_component_of_relation, dc_id, ds_id)

## Create and populate Vector Index

In [6]:
INDEX_NAME = "SDOs"

create_vector_index(
    driver,
    INDEX_NAME,
    label="SDO",
    embedding_property="embedding",
    dimensions=3584,
    similarity_fn="cosine",
)

In [15]:
embedder = OllamaEmbeddings(model="rjmalagon/gte-qwen2-7b-instruct:f16")

with driver.session() as session:
    nodes = session.run("MATCH (n) RETURN n")
    for record in nodes:
        node = record["n"]
        if node["description"]:
            vector = embedder.embed_query(node["description"])
            upsert_vectors(
                driver,
                ids=[node.element_id],
                embedding_property="embedding",
                embeddings=[vector],
                entity_type=EntityType.NODE,
            )


## Similarity Search

In [22]:
retriever = VectorRetriever(driver, "SDOs", embedder)
query_text = "Which mitigation strategy is recommended to counteract the abuse of setuid and setgid bits?"
result = retriever.search(query_text=query_text, top_k=3)
for item in result.items:
    dict_item = ast.literal_eval(item.content)
    print(dict_item["name"])
    print(dict_item["description"])
    print("---------------------")

Setuid and Setgid Mitigation
Applications with known vulnerabilities or known shell escapes should not have the setuid or setgid bits set to reduce potential damage if an application is compromised. Additionally, the number of programs with setuid or setgid bits set should be minimized across a system.
---------------------
Setuid and Setgid
An adversary may abuse configurations where an application has the setuid or setgid bits set in order to get code running in a different (and possibly more privileged) user’s context. On Linux or macOS, when the setuid or setgid bits are set for an application binary, the application will run with the privileges of the owning user or group respectively.(Citation: setuid man page) Normally an application is run in the current user’s context, regardless of which user or group owns the application. However, there are instances where programs need to be executed in an elevated context to function properly, but the user running them may not have the spe