In [1]:
import networkx as nx
import nest_asyncio
import ssl
import certifi
import re
from langchain_openai import AzureChatOpenAI
from CosmosGremlinGraph.GremlinGraph import GremlinGraph
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langgraph.prebuilt import create_react_agent

In [3]:
G = nx.read_graphml('CosmosGremlinGraph/graph.graphml')

In [None]:
nest_asyncio.apply()
ssl_context = ssl.create_default_context(cafile=certifi.where())

# Replace these values with your Cosmos DB Gremlin endpoint and primary key
COSMOS_DB_ENDPOINT = 'wss://sherlock-ai-account.gremlin.cosmos.azure.com:443/'
COSMOS_DB_PRIMARY_KEY = '=='
DATABASE = 'sherlock-db'
GRAPH = 'sherlock-ai-graph'

graph = GremlinGraph(
    url=COSMOS_DB_ENDPOINT,
    username=f"/dbs/{DATABASE}/colls/{GRAPH}",
    password=COSMOS_DB_PRIMARY_KEY,
    ssl_context=ssl_context
)

In [3]:
# Azure OpenAI Configuration
AZURE_OPENAI_ENDPOINT = "https://aishu-m8m23xc2-eastus2.cognitiveservices.azure.com/"
AZURE_OPENAI_KEY = "2TpUnxlwFmgMU5xCyFc4HncL43stei4En4SRL6KbD6oH5062zE7hJQQJ99BCACHYHv6XJ3w3AAAAACOGWq6g"
AZURE_DEPLOYMENT_NAME = "gpt-4o"

# Initialize the Azure Chat Model in LangChain
llm = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_api_version="2025-01-01-preview",
    azure_deployment=AZURE_DEPLOYMENT_NAME,
    api_key=AZURE_OPENAI_KEY,
    temperature=0,
)

In [28]:
langchain_prompt = """
You are a Criminal Tracking & Analysis Agent using Cosmos Gremlin and NetworkX.
Primary Goal: use Gremlin queries or NetworkX code to answer user questions about the crime network.
In case of hybrid queries follow this sequence:
- Get the filtered data using AQL
- Use this data in the networkx code.
Respond to user queries with the minimal steps required.
Give detailed response and use proper formatting(using bullet points). Make sure to include the results in your output.
"""

gremlin_msg = """
This is a Chicago Crime Network stored in Azure Cosmos DB Gremlin Graph, with the following node and edge labels:
crime node is connected to all the other nodes by the edges mentioned.
All other nodes are having a inbound connection from crime
Vertex labels are the following: crime,location,crime_type,criminal,district,date,hour Edge labes are the following: located_at,is_type,occurred_at_hour,occurred_on_date,involved_criminal,located_in_district Vertices have following properties: ["crime":["crime_id"],"location":["location"],"crime_type":["crime_type"],"criminal":["name"],"district":["district"],"date":["date"],"hour":["hour"]]'
MAKE SURE TO ALWAYS OUTPUT ID OF THE NODE. FILTER THE OUTPUT TO MINIMUM REQUIRED DATA WITH ONLY KEYWORDS NOT A MAP.
ONLY GIVE ME THE GREMLIN QUERY. DO NOT PROVIDE ANY INSTRUCTIONS.
"""

gremlin_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            gremlin_msg
        ),
        ("human", "{input}"),
    ]
)

gremlin_chain = gremlin_prompt | llm


networkx_msg = """
** THERE SHOULD BE NO TEXT IN THE OUTPUT, JUST CODE **
1. ONLY provide python code that I can directly execute via `exec()`. Do not provide any instructions.
2. Code must reference `G` as the networkx graph.
3. Use only `networkx (nx)`. nx is already available so no need to import.
4. Store the final result in `FINAL_RESULT`.
5. Use the correct algorithm

Python Code:
"""

networkx_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            networkx_msg
        ),
        ("human", "{input}"),
    ]
)

networkx_chain = networkx_prompt | llm

In [None]:
def text_to_gremlin(query: str) -> str:
    """Strictly receives natural language queries then Converts to Gremlin queries and executes it in Azure Cosmos Gremlin."""
    print("Query is: ", query)
    print("Executing Gremlin Query...")

    gremlin_query = gremlin_chain.invoke({"input": query}).content.split("\n")[1]

    print("Gremlin query: " , gremlin_query)

    return graph.query(gremlin_query)


def text_to_nxgraph(query: str) -> str:
    """Uses networkx to analyze the crime graph."""

    print("Executing NetworkX Query...")

    global_vars = {"G": G, "nx": nx}
    local_vars = {}

    graph_analysis_code = networkx_chain({"input" : query}).content

    # Strip triple backticks if present
    cleaned_code = re.sub(r"^```python\n|```$", "", graph_analysis_code, flags=re.MULTILINE).strip()

    print(f"Python Code: \n\n {cleaned_code}")

    global_vars = {"G": G, "nx": nx}
    local_vars = {}

    try:
        exec(cleaned_code, global_vars, local_vars)
        return local_vars.get("FINAL_RESULT", "No result found.")
    except Exception as e:
        return f"Error executing cuGraph code: {e}"

In [None]:
tools = [
    Tool(name="GremlinQuery", func=text_to_gremlin, description="STRICTLY only receives natural language queries then Generates and Executes Gremlin queries in Consmos DB."),
    Tool(name="NetworkxQuery", func=text_to_nxgraph, description="Generates python code and Performs networkx graph analysis.")
]


agent = create_react_agent(model=llm, tools=tools)


def query_graph(query: str) -> str:
    """Runs a query through the agent."""
    return agent.invoke({"messages": [{"role": "user", "content": query}]})

In [None]:
query_graph("What are the locations where crime is committed by Kristine Long?")

Query is:  g.V().has('person', 'name', 'Kristine Long').out('committed').values('location')
Executing Gremlin Query...
