In [42]:
import networkx as nx
import nest_asyncio
import ssl
import certifi
import re
from langchain_openai import AzureChatOpenAI
from CosmosGremlinGraph.GremlinGraph import GremlinGraph
from langchain_core.prompts import ChatPromptTemplate
from langchain.tools import Tool
from langgraph.prebuilt import create_react_agent

In [12]:
G = nx.read_graphml('CosmosGremlinGraph/graph.graphml')

In [None]:
nest_asyncio.apply()
ssl_context = ssl.create_default_context(cafile=certifi.where())

# Replace these values with your Cosmos DB Gremlin endpoint and primary key
COSMOS_DB_ENDPOINT = 'wss://sherlock-ai-account.gremlin.cosmos.azure.com:443/'
COSMOS_DB_PRIMARY_KEY = '=='
DATABASE = 'sherlock-db'
GRAPH = 'sherlock-ai-graph'

graph = GremlinGraph(
    url=COSMOS_DB_ENDPOINT,
    username=f"/dbs/{DATABASE}/colls/{GRAPH}",
    password=COSMOS_DB_PRIMARY_KEY,
    ssl_context=ssl_context
)

In [None]:
# Azure OpenAI Configuration
AZURE_OPENAI_ENDPOINT = "https://aishu-m8q3ed4m-swedencentral.cognitiveservices.azure.com/"
AZURE_OPENAI_KEY = "028qDuTdd4Z5y0nsdbVns8ZesZeQxt4NEQmW33BObOs7cLO9gIteJQQJ99BCACfhMk5XJ3w3AAAAACOGWE1L"
AZURE_DEPLOYMENT_NAME = "gpt-4o"

# Initialize the Azure Chat Model in LangChain
llm = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    openai_api_version="2025-01-01-preview",
    azure_deployment=AZURE_DEPLOYMENT_NAME,
    api_key=AZURE_OPENAI_KEY,
    temperature=0
)

In [None]:
gremlin_msg = """
This is a Chicago Crime Network stored in Azure Cosmos DB Gremlin Graph, with the following node and edge labels:
crime node is connected to all the other nodes by the edges mentioned.
All other nodes are having a inbound connection from crime
Vertex labels are the following: crime,location,crime_type,criminal,district,date,hour Edge labes are the following: located_at,is_type,occurred_at_hour,occurred_on_date,involved_criminal,located_in_district Vertices have following properties: ["crime":["crime_id"],"location":["location"],"crime_type":["crime_type"],"criminal":["name"],"district":["district"],"date":["date"],"hour":["hour"]]'
AVOID USING elementMap() in your queries.
ONLY GIVE ME THE GREMLIN QUERY. DO NOT PROVIDE ANY INSTRUCTIONS.
"""

gremlin_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            gremlin_msg
        ),
        ("human", "{input}"),
    ]
)

gremlin_chain = gremlin_prompt | llm


networkx_msg = """
** THERE SHOULD BE NO TEXT IN THE OUTPUT, JUST CODE **
1. ONLY provide python code that I can directly execute via `exec()`. Do not provide any instructions.
2. Code must reference `G` as the networkx graph.
3. Use only `networkx (nx)`. nx is already available so no need to import.
4. Store the final result in `FINAL_RESULT`.
5. Use the correct algorithm

Python Code:
"""

networkx_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            networkx_msg
        ),
        ("human", "{input}"),
    ]
)

networkx_chain = networkx_prompt | llm

decision_msg = """
You are a Criminal Tracking & Analysis Agent using Cosmos Gremlin and NetworkX.
Generate python code based on the user query below understanding which method you need to call and what query is to be passed.
Use hybrid queries where required. For example: To find shortest path between two criminals first call the text_to_nxgraph method 
to get the node ids involved in shortest path which returns node ids no need to extract anything and then get information about these IDs using gremlin method text_to_gremlin with a single query mentioning that these are node ids. Always the last step is to craft and send the prompt to llm.invoke() method to generate a well crafted summary. In the prompt include all the results that we go till now. For paths join them with arrows properly and accurately including all nodes.
Methods available:
- text_to_gremlin (STRICTLY only receives natural language queries then Generates and Executes Gremlin queries in Consmos DB.)
- text_to_nxgraph (Executes python code for networkx graph analysis. Result is returned from this method so no need to execute)
ONLY GIVE ME THE PYTHON CODE. DO NOT PROVIDE ANY INSTRUCTIONS.
"""

decision_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            decision_msg
        ),
        ("human", "{input}"),
    ]
)

decision_chain = decision_prompt | llm

In [None]:
def text_to_gremlin(query: str) -> str:
    """Strictly receives natural language queries then Converts to Gremlin queries and executes it in Azure Cosmos Gremlin."""
    print("Query is: ", query)
    print("Executing Gremlin Query...")

    gremlin_query = gremlin_chain.invoke({"input": query}).content.split("\n")[1]

    print("Gremlin query: " , gremlin_query)

    return graph.query(gremlin_query)


def text_to_nxgraph(query: str) -> str:
    """Uses networkx to analyze the crime graph."""

    print("Executing NetworkX Query...")

    global_vars = {"G": G, "nx": nx}
    local_vars = {}

    graph_analysis_code = networkx_chain.invoke({"input" : query}).content

    # Strip triple backticks if present
    cleaned_code = re.sub(r"^```python\n|```$", "", graph_analysis_code, flags=re.MULTILINE).strip()

    print(f"Python Code: \n\n {cleaned_code}")

    global_vars = {"G": G, "nx": nx}
    local_vars = {}

    try:
        exec(cleaned_code, global_vars, local_vars)
        return local_vars.get("FINAL_RESULT", "No result found.")
    except Exception as e:
        return f"Error executing networkx code: {e}"

def decision_maker(query: str):
    print("Executing Decision Making....")

    decision_code = decision_chain.invoke({"input" : query}).content
    decision_code_cleaned = re.sub(r"^```python\n|```$", "", decision_code, flags=re.MULTILINE).strip()

    print(f"Decision Python Code: \n\n {decision_code_cleaned}")

    global_vars = {"G": G, "nx": nx, "text_to_gremlin": text_to_gremlin, "text_to_nxgraph": text_to_nxgraph, "llm": llm}
    local_vars = {}

    try:
        exec(decision_code_cleaned, global_vars, local_vars)

        return local_vars.get("summary", "No result found.")
    except Exception as e:
        return f"Error executing decision code: {e}"

In [None]:
resp = decision_maker("What is the shortest path between Kathleen Davis and Kristine Long?")
print(resp)

In [55]:
print(resp.content)

The shortest path between Kathleen Davis and Kristine Long reveals a connection through a series of nodes that represent crimes, districts, and individuals. Here's a detailed breakdown of the path and its significance:

---

### **Path Summary**
1. **Kathleen Davis** (Node: `Kathleen Davis`, Label: `criminal`)
   - Kathleen Davis is identified as a criminal associated with the crime type **BURGLARY**.
   - She is connected to a specific crime with the ID **11400914**.

2. **Crime ID: 11400914** (Node: `11400914`, Label: `crime`)
   - This node represents a crime with the ID **11400914**, categorized as **BURGLARY**.
   - This crime is linked to **District 25.0**.

3. **District 25.0** (Node: `25.0`, Label: `district`)
   - This node represents **District 25.0**, which is associated with the crime type **CRIMINAL DAMAGE**.
   - The district serves as a geographical link between the two crimes.

4. **Crime ID: 10651561** (Node: `10651561`, Label: `crime`)
   - This node represents a crim

In [None]:
resp2 = decision_maker("What are all the locations where Kristine Long has committed a crime?")

Executing Decision Making....
Decision Python Code: 

 # Step 1: Use text_to_gremlin to find all locations where Kristine Long has committed a crime.
locations_result = text_to_gremlin("Find all the locations where Kristine Long has committed a crime.")

# Step 2: Craft and send the prompt to llm.invoke() to generate a well-crafted summary.
summary_prompt = f"Kristine Long has committed crimes in the following locations: {locations_result}. Provide a detailed summary of these locations."
summary = llm.invoke(summary_prompt)

summary
Query is:  Find all the locations where Kristine Long has committed a crime.
Executing Gremlin Query...
Gremlin query:  g.V().hasLabel('criminal').has('name', 'Kristine Long').in('involved_criminal').out('located_at').values('location')
{'locations_result': ['075XX N PAULINA ST', '010XX E 111TH ST', '043XX W NORTH AVE', '008XX S CICERO AVE'], 'summary_prompt': "Kristine Long has committed crimes in the following locations: ['075XX N PAULINA ST', '010XX E 11

In [57]:
print(resp2.content)

Certainly! Below is a detailed summary of the locations where Kristine Long has reportedly committed crimes. These locations are based on the addresses provided and their general characteristics within the city of Chicago:

---

### 1. **075XX N Paulina St**
   - **Neighborhood**: Rogers Park
   - **Description**: Located on the far North Side of Chicago, Rogers Park is a diverse and densely populated neighborhood. It is known for its cultural vibrancy, historic architecture, and proximity to Lake Michigan. The area around North Paulina Street is primarily residential, with a mix of single-family homes, apartment buildings, and small businesses.
   - **Crime Context**: Crimes in this area may involve residential burglaries, theft, or other property-related offenses, though violent crimes are less common compared to other parts of the city.

---

### 2. **010XX E 111th St**
   - **Neighborhood**: Pullman/Roseland
   - **Description**: This address is located on the South Side of Chicago