In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

from langchain_core.tools import tool
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [3]:
# Load helper functions and setup
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))
sys.path.append(project_root)

from analysis.chicago.setup import setup_llm_and_graph
from analysis.chicago.utils import run_validation_pipeline, parse_shacl_result, process_agent_step
from analysis.chicago.regulatory_compliance.prompt import SHACL_PROMPTS, AGENT_PROMPTS
from analysis.chicago.regulatory_compliance.regulatory_utils import run_explainability_query

llm, neo4j_graph = setup_llm_and_graph()

# SHACL Validation Test

In [4]:
# Test the generation of the SHACL rules
rdf_schema = SHACL_PROMPTS["rdf_schema"]
message = SHACL_PROMPTS["message"]

prompt = PromptTemplate(
    input_variables=["message", "rdf_schema"],
    template=SHACL_PROMPTS["template"]
)

chain = prompt | llm | StrOutputParser()
sparql_query = chain.invoke({"message": message, "rdf_schema": rdf_schema})
shacl_shape = f'''
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix gr: <http://purl.org/goodrelations/v1#> .
@prefix pco: <http://purl.org/procurement/public-contracts#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<#BusinessLicenseContractShape> a sh:NodeShape ;
    sh:targetClass gr:BusinessEntity ;
    sh:message "The newest license end date must not be before the oldest contract start date." ;
    sh:sparql [
        a sh:SPARQLConstraint ;
        sh:message "The newest license end date must not be before the oldest contract start date." ;
        sh:select \"\"\" {sparql_query}\"\"\" ;
    ] .
'''
print(shacl_shape)


@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix gr: <http://purl.org/goodrelations/v1#> .
@prefix pco: <http://purl.org/procurement/public-contracts#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<#BusinessLicenseContractShape> a sh:NodeShape ;
    sh:targetClass gr:BusinessEntity ;
    sh:message "The newest license end date must not be before the oldest contract start date." ;
    sh:sparql [
        a sh:SPARQLConstraint ;
        sh:message "The newest license end date must not be before the oldest contract start date." ;
        sh:select """ SELECT ?this ?maxLicenseEndDate ?minContractStartDate WHERE {
  {
    SELECT ?this (MAX(?licenseEndDate) AS ?maxLicenseEndDate) WHERE {
      ?this <http://purl.org/goodrelations/v1#hasLicense> ?license .
      ?license <http://purl.org/procurement/public-contracts#actualEndDate> ?licenseEndDate .
    } GROUP BY ?this
  }
  {
    SELECT ?this (MIN(?contractStartDate) AS ?minContractStartDate) WHERE {
      ?contract <http://purl

In [5]:
conforms, results_graph, results_text = run_validation_pipeline("../../data/chicago/compliance.ttl", shacl_shape)

Data Graph has 1117563 statements.
SHACL Graph has 7 statements.


In [6]:
print(results_text)

Validation Report
Conforms: False
Results (13):
Constraint Violation in SPARQLConstraintComponent (http://www.w3.org/ns/shacl#SPARQLConstraintComponent):
	Severity: sh:Violation
	Source Shape: <file:///Users/giuseppefutia/Desktop/code/klab/analysis/chicago/#BusinessLicenseContractShape>
	Focus Node: <http://purl.org/goodrelations/v1#BusinessEntity/4:5bc9e9e3-9f8e-4060-84df-00fa505e2753:2321214>
	Value Node: <http://purl.org/goodrelations/v1#BusinessEntity/4:5bc9e9e3-9f8e-4060-84df-00fa505e2753:2321214>
	Source Constraint: [ rdf:type sh:SPARQLConstraint ; sh:message Literal("The newest license end date must not be before the oldest contract start date.") ; sh:select Literal(" SELECT ?this ?maxLicenseEndDate ?minContractStartDate WHERE {
  {
    SELECT ?this (MAX(?licenseEndDate) AS ?maxLicenseEndDate) WHERE {
      ?this <http://purl.org/goodrelations/v1#hasLicense> ?license .
      ?license <http://purl.org/procurement/public-contracts#actualEndDate> ?licenseEndDate .
    } GROUP BY ?t

# LangGraph React Agent

In [7]:
import json

@tool
def generate_shacl_shape(rdf_schema: str, message: str):
    """
    Generates a SHACL NodeShape based on the RDF schema and a message describing the validation.

    Expects two parameters:
      - 'rdf_schema': a string with RDF prefixes and assumptions
      - 'message': a string describing the SHACL rule to implement
    Returns the SHACL NodeShape in Turtle syntax as a string.
    """

    if not rdf_schema or not message:
        return "Error: 'rdf_schema' and 'message' must be provided."

    prompt = SHACL_PROMPTS["template"]

    return prompt

@tool
def run_shacl_validation(unused: str):
    """Process and report SHACL validation results from a structured representation of Graph data"""
    graph_data = parse_shacl_result(results_graph)
    graph_data_str = json.dumps(graph_data, indent=2)
    return graph_data_str

@tool
def get_context_from_neo4j(nodes_with_issues: list):
    """
    For each node, fetch actual related context from Neo4j using the configured Cypher queries.
    Returns real query results.
    """

    return run_explainability_query(neo4j_graph, nodes_with_issues)


tools = [generate_shacl_shape, run_shacl_validation, get_context_from_neo4j]

In [8]:
from langgraph.prebuilt import create_react_agent

def explain_shacl_issues(llm, tools):
    """
    Run a LangGraph agent to summarize and explain SHACL validation results,
    including deeper insights via Cypher queries.
    
    Parameters:
        llm: The language model instance.
        tools: List of tools including any CypherQuery tool needed.
    
    Returns:
        The explanation string from the agent's final message.
    """

    # System message (persona and scope)
    system_message = (
        "You are helpful assistant that it is able to generate SPARQL query for RDF validation."
        "You are a helpful assistant who reports SHACL validation issues and explains them clearly. "
        "You have access to context through Cypher queries and can analyze ContractRecord and LicenseRecord details."
    )

    # Build the LangGraph REAct-style agent
    langgraph_agent_executor = create_react_agent(llm, tools, prompt=system_message)

    # Structured, improved prompt
    base_query = AGENT_PROMPTS["explain"]

    # Invoke the agent
    print("---DEBUGGING")
    for step in langgraph_agent_executor.stream({"messages": [("human", base_query)]}):
        process_agent_step(step)
    
    print("---FINAL VERSION")
    response = langgraph_agent_executor.invoke({"messages": [("human", base_query)]})
    return response["messages"][-1].content

In [9]:
response = explain_shacl_issues(llm=llm, tools=tools)
print(response)

---DEBUGGING

=== Agent Step ===

🔧 Tool Call: generate_shacl_shape
🧾 Arguments:
  - rdf_schema: @prefix ex: <http://example.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:ContractR...
  - message: Validate that for each ContractRecord, the startDate is before the endDate, and for each LicenseReco...

=== Tool Results ===

🛠️ Tool Name: generate_shacl_shape
🆔 Tool Call ID: call_cocnkzZnZqabbmN48S3faT2i
📄 Content:
You are an expert in SHACL and SPARQL.

        Given the following RDF schema (Turtle format):
        {rdf_schema}

        And the given message:
        "{message}"

        Write only the SPARQL SELECT query that enforces this rule. 
        Do not include any explanations, comments, SHACL syntax, sparql or RDF prefixes.
        Do not include any sparql quote. 
        Output only the raw SPARQL query, starting directly with SELECT.
        

=== Agent Step ===

🔧 Tool Call: run_shacl_validation
🧾 Arguments:
  - unused: 

=== Tool Results ===

🛠️ Tool Nam