In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import configparser

from langchain_core.tools import tool
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [3]:
# Load helper functions and setup
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)

from analysis.chicago.setup import setup_llm_and_graph
from analysis.chicago.utils import run_validation_pipeline, parse_shacl_result, process_agent_step
from analysis.chicago.conflict_of_interest.prompt import SHACL_PROMPTS, AGENT_PROMPTS
from analysis.chicago.conflict_of_interest.conflict_utils import run_explainability_query
from yfiles_jupyter_graphs_for_neo4j import Neo4jGraphWidget
from database.neo4j_db import Neo4jGraphDB

llm, neo4j_graph = setup_llm_and_graph()

In [4]:
# Load configuration, database, and visualization
config = configparser.ConfigParser()
config.read('../../config.ini')
neo4j_graph = Neo4jGraphDB()
driver = Neo4jGraphDB()._driver
jg = Neo4jGraphWidget(driver)

# SHACL Validation Test

In [5]:
# Test the generation of the SHACL rules
rdf_schema = SHACL_PROMPTS["rdf_schema"]
message = SHACL_PROMPTS["message"]

prompt = PromptTemplate(
    input_variables=["message", "rdf_schema"],
    template=SHACL_PROMPTS["template"]
)

chain = prompt | llm | StrOutputParser()
shacl_shape = chain.invoke({"message": message, "rdf_schema": rdf_schema})
print(shacl_shape)

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix schema: <https://schema.org/> .

<#DisconnectedPeopleShape> a sh:NodeShape ;
    sh:targetClass foaf:Person ;
    sh:property [
        sh:path schema:relatedTo ;
        sh:maxCount 0 ;
        sh:message "A person must not be connected to another person via schema:relatedTo. It means a potential conflict of interest."
    ] .


In [6]:
conforms, results_graph, results_text = run_validation_pipeline("../../data/chicago/conflict.ttl", shacl_shape)

Data Graph has 388010 statements.
SHACL Graph has 6 statements.


In [7]:
print(results_text)

Validation Report
Conforms: False
Results (72):
Constraint Violation in MaxCountConstraintComponent (http://www.w3.org/ns/shacl#MaxCountConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:maxCount Literal("0", datatype=xsd:integer) ; sh:message Literal("A person must not be connected to another person via schema:relatedTo. It means a potential conflict of interest.") ; sh:path schema:relatedTo ]
	Focus Node: <http://xmlns.com/foaf/0.1/Person/4:5bc9e9e3-9f8e-4060-84df-00fa505e2753:450997>
	Result Path: schema:relatedTo
	Message: A person must not be connected to another person via schema:relatedTo. It means a potential conflict of interest.
Constraint Violation in MaxCountConstraintComponent (http://www.w3.org/ns/shacl#MaxCountConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:maxCount Literal("0", datatype=xsd:integer) ; sh:message Literal("A person must not be connected to another person via schema:relatedTo. It means a potential conflict of interest.") ; 

# LangGraph React Agent

In [8]:
import json

@tool
def generate_shacl_shape(rdf_schema: str, message: str):
    """
    Generates a SHACL NodeShape based on the RDF schema and a message describing the validation.

    Expects two parameters:
      - 'rdf_schema': a string with RDF prefixes and assumptions
      - 'message': a string describing the SHACL rule to implement
    Returns the SHACL NodeShape in Turtle syntax as a string.
    """

    if not rdf_schema or not message:
        return "Error: 'rdf_schema' and 'message' must be provided."

    prompt = SHACL_PROMPTS["template"]

    return prompt

@tool
def run_shacl_validation(unused: str):
    """Process and report SHACL validation results from a structured representation of Graph data"""
    graph_data = parse_shacl_result(results_graph)
    graph_data_str = json.dumps(graph_data, indent=2)
    return graph_data_str

@tool
def get_context_from_neo4j(nodes_with_issues: list):
    """
    For each node, fetch actual related context from Neo4j using the configured Cypher queries.
    Returns real query results.
    """

    return run_explainability_query(neo4j_graph, nodes_with_issues)


tools = [generate_shacl_shape, run_shacl_validation, get_context_from_neo4j]

In [9]:
from langgraph.prebuilt import create_react_agent

def explain_shacl_issues(llm, tools):
    """
    Run a LangGraph agent to summarize and explain SHACL validation results,
    including deeper insights via Cypher queries.
    
    Parameters:
        llm: The language model instance.
        tools: List of tools including any CypherQuery tool needed.
    
    Returns:
        The explanation string from the agent's final message.
    """

    # System message (persona and scope)
    system_message = (
        "You are a helpful assistant who reports SHACL validation issues and explains them clearly. "
        "You have access to context through Cypher queries and can analyze ContractRecord and LicenseRecord details."
    )

    # Build the LangGraph REAct-style agent
    langgraph_agent_executor = create_react_agent(llm, tools, prompt=system_message)

    # Structured, improved prompt
    base_query = AGENT_PROMPTS["explain"]

    # Invoke the agent
    print("---DEBUGGING")
    for step in langgraph_agent_executor.stream({"messages": [("human", base_query)]}):
        process_agent_step(step)
    
    print("---FINAL VERSION")
    response = langgraph_agent_executor.invoke({"messages": [("human", base_query)]})
    return response["messages"][-1].content

In [10]:
response = explain_shacl_issues(llm=llm, tools=tools)
print(response)

---DEBUGGING

=== Agent Step ===

🔧 Tool Call: generate_shacl_shape
🧾 Arguments:
  - rdf_schema: @prefix schema: <http://schema.org/> .
@prefix ex: <http://example.org/> .

ex:Person a rdfs:Class ;...
  - message: Validate that no two people connected by schema:relatedTo have the same surname to avoid potential c...

🔧 Tool Call: run_shacl_validation
🧾 Arguments:
  - unused: 

=== Tool Results ===

🛠️ Tool Name: generate_shacl_shape
🆔 Tool Call ID: call_p4F9EVaaO2QUhb2R9WxK6on4
📄 Content:

        You are an expert in SHACL and RDF validation.

        Given the following RDF schema:
        {rdf_schema}

        And the following instruction:
        "{message}"

        Write only the SHACL NodeShape in Turtle syntax.

        Important formatting rules:
        - Use exactly the NodeShape URI <#DisconnectedPeopleShape>.
        - Include all necessary prefixes.
        - Do NOT add any ```turtle, ``` or other code block markers.
        - Do NOT add any extra text, explanation, or c

In [11]:
# Showing bridges between highly-connected records
jg.show_cypher("""
               MATCH 
                    path=(p2:Person)<-[:RECORD_RESOLVED_TO]-(:PersonRecord)
                    -[:WORKS_FOR_DEPARTMENT]->(dept1:Department)
                    -[:IS_SIMILAR_TO]->(dept2:Department)
                    -[:ASSIGNS_CONTRACT]->(:Contract)<-[:INCLUDED_IN_CONTRACT]-(c:ContractRecord)
                    -[:HAS_VENDOR]->(vendor1:Organization)
                    -[:BELONGS_TO_ORG_GROUP]->(orgGroup:OrganizationGroup)<-[:BELONGS_TO_ORG_GROUP]-
                    (vendor2:Organization)<-[:WORKS_FOR_ORG]-(p1:Person)
                WHERE 
                    (
                        elementId(p2) = "4:5bc9e9e3-9f8e-4060-84df-00fa505e2753:492537 "OR 
                        elementId(p1) = "4:5bc9e9e3-9f8e-4060-84df-00fa505e2753:492537"
                    )
                    AND vendor1 <> vendor2
                    AND p1 <> p2
                    AND toLower(p1.name) <> toLower(p2.name)
                    AND split(toLower(p1.name), " ")[-1] = split(toLower(p2.name), " ")[-1]
                RETURN path
                LIMIT 5
               """)

GraphWidget(layout=Layout(height='650px', width='100%'))