In [None]:
!pip install requests  # lightweight, no OpenAI SDK needed
!pip install langchain
!pip install neo4j
!pip install langchain_community

In [3]:
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from datetime import date

In [4]:
import os
key_id    = os.environ["AZURE_KEY_ID"]
endpoint  = os.environ["AZURE_AI_ENDPOINT"]
api_key   = os.environ["AZURE_AI_KEY"]

In [5]:
url_key = key_id
url_value=api_key

In [6]:
import os
import json
import requests

# -------------  CONFIG  -------------
API_BASE   = "https://apim-idmrncl4iiyvo.azure-api.net/aoai/openai"
DEPLOYMENT = "gpt-4o"                     # the “deployment name” you gave in the portal
API_VER    = "2025-01-01-preview"         # the same version you used in Postman
API_KEY    = os.getenv("AOAI_APIM_KEY")   # or paste directly (not recommended)
# -------------------------------------

url = f"{API_BASE}/deployments/{DEPLOYMENT}/chat/completions?api-version={API_VER}"

headers = {
    "Content-Type": "application/json",
    "api-key":      url_value              # APIM forwards this to the real AOAI service
}
HEADERS = headers

payload = {
    "messages": [
        { "role": "system", "content": "You are a helpful assistant." },
        { "role": "user",   "content": "Can you create a Cypher query for Neo4j on temporal search of point-in-time?" }
    ],
    "max_tokens": 500
}

resp = requests.post(url, headers=headers, json=payload, timeout=30)
resp.raise_for_status()                  # raise if the call failed

print(json.dumps(resp.json(), indent=2))


{
  "choices": [
    {
      "content_filter_results": {
        "hate": {
          "filtered": false,
          "severity": "safe"
        },
        "protected_material_code": {
          "filtered": false,
          "detected": false
        },
        "protected_material_text": {
          "filtered": false,
          "detected": false
        },
        "self_harm": {
          "filtered": false,
          "severity": "safe"
        },
        "sexual": {
          "filtered": false,
          "severity": "safe"
        },
        "violence": {
          "filtered": false,
          "severity": "safe"
        }
      },
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "annotations": [],
        "content": "Certainly! When you want to perform a temporal search in Neo4j using Cypher, you can leverage the temporal data types provided by Neo4j like `Date`, `Time`, `LocalDateTime`, `DateTime`, etc. These temporal types help you perfor

## Connect To NEO4J

In [7]:
NEO4J_URI = 'bolt://citz-imb-ai-neo4j-svc:7687'

In [8]:
#NEO4J_URI = 'bolt://10.98.229.110:7687'
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = '12345678'
NEO4J_DATABASE = 'neo4j'

# connect with the graph
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

  kg = Neo4jGraph(


## Sytem prompt for temporal search

In [9]:
today = date.today()
print("Today's date:", today)

Today's date: 2025-07-07


In [10]:
# ── 1. THE SYSTEM PROMPT (exactly what you pasted) ────────────────────────────
SYSTEM_PROMPT = f"""
You are CYPHER-GPT.

TASK
• Translate the user’s question about British-Columbia laws into ONE Cypher
  query.
• Your output must be ONLY a ```cypher code block – no prose, no comments.

DATA MODEL (important)
• Each law chunk is a node with label like:
      :consolidation_<SNAPSHOT_ID>_<SOURCE_TYPE>
      – <SNAPSHOT_ID> = 14–43 (Acts) or 68–110 (Regs)
      – <SOURCE_TYPE>  = 'act' or 'reg'
• Properties:
      act_title       : string      // for Acts
      reg_title       : string      // for Regs
      source_type     : "act" or "reg"
      snapshot_id     : string
      snapshot_date   : date
      section_number  : string
      section_title   : string
      content         : string
      url             : string

TEMPORAL LOGIC
• “First introduced” → node with **earliest** snapshot_date.
• “As of 2019” → node with latest snapshot_date ≤ date("2019-01-01")

MACROS YOU MAY COPY
; Full detail of earliest version of an Act
MATCH (n)
WHERE n.act_title = "Coal Act" AND n.source_type = "act"
RETURN n.snapshot_id,
       n.snapshot_date,
       n.section_number,
       n.section_title,
       n.content,
       n.url
ORDER BY n.snapshot_date ASC
LIMIT 1;

; Full detail of a Regulation as of 2015-01-01
WITH date("2015-01-01") AS target
MATCH (n)
WHERE n.reg_title = "Occupational Health Regulation"
  AND n.source_type = "reg"
  AND n.snapshot_date <= target
RETURN n.snapshot_id,
       n.snapshot_date,
       n.section_number,
       n.section_title,
       n.content,
       n.url,
       n.source_type,
       n.snapshot_id
ORDER BY n.snapshot_date DESC
LIMIT 1;

RESPONSE FORMAT
• Output ONLY a Cypher query inside a ```cypher block.
• No explanation, comments, or metadata.
* cypher query must include the return of all the properties of the nodes

OTHER FACTS:
Today's date: {today}

"""

In [11]:
SYSTEM_PROMPT = f"""
You are CYPHER-GPT.

TASK
• Translate the user’s question about British Columbia laws into ONE Cypher query.
• Output must be ONLY a ```cypher code block – no explanation, no prose, no comments.

DATA MODEL
• Each law section is a node with label:
      :consolidation_<SNAPSHOT_ID>_<SOURCE_TYPE>
      – <SNAPSHOT_ID>: string ID (14–43 for Acts, 68–110 for Regulations)
      – <SOURCE_TYPE>: "act" or "reg"
• Node Properties:
      act_title       : string  // for Acts
      reg_title       : string  // for Regulations
      source_type     : "act" or "reg"
      snapshot_id     : string
      snapshot_date   : date
      section_number  : string
      section_title   : string
      content         : string
      url             : string

QUERY RULES
• Always use consistent aliases and formatting.
• Always return **all listed properties**, **without duplicates**.
• If source_type is "act", use `act_title`; if "reg", use `reg_title`.
• Use `WITH date("YYYY-MM-DD") AS target` for date comparisons.
• Use `ORDER BY n.snapshot_date ASC|DESC` with `LIMIT 1` for snapshot picking.
• NEVER return duplicate fields like `snapshot_id` more than once.
• NEVER guess ambiguous phrases—use the date closest to the question.
    - "as of 2015" → `snapshot_date <= date("2015-01-01")`
    - "around mid-2014" → `snapshot_date <= date("2014-07-01")`
    - "Q4 2009" → `snapshot_date <= date("2009-12-31")`
• “First introduced” → sort by ASC and limit to 1.
• “Latest before [DATE]” → DESC and limit 1 with date filter.
• “Changed since [DATE]” → filter with `snapshot_date > DATE` and sort ASC.

EXAMPLE MACROS

; Earliest version of an Act
MATCH (n)
WHERE n.act_title = "Coal Act" AND n.source_type = "act"
RETURN n.snapshot_id,
       n.snapshot_date,
       n.section_number,
       n.section_title,
       n.content,
       n.url
ORDER BY n.snapshot_date ASC
LIMIT 1;

; Regulation as of a given date
WITH date("2015-01-01") AS target
MATCH (n)
WHERE n.reg_title = "Occupational Health Regulation"
  AND n.source_type = "reg"
  AND n.snapshot_date <= target
RETURN n.snapshot_id,
       n.snapshot_date,
       n.section_number,
       n.section_title,
       n.content,
       n.url,
       n.source_type
ORDER BY n.snapshot_date DESC
LIMIT 1;

OTHER FACTS
Today's date: {today}
"""

In [12]:
SYSTEM_PROMPT = f"""
You are CYPHER-GPT.

TASK
• Translate the user’s question about British Columbia laws into ONE valid Cypher query.
• Your output must be ONLY a ```cypher code block – no explanation, no prose, no comments.

DATA MODEL
• Each law section is a node with label:
      :consolidation_<SNAPSHOT_ID>_<SOURCE_TYPE>
      – <SNAPSHOT_ID>: string ID (14–43 for Acts, 68–110 for Regulations)
      – <SOURCE_TYPE>: "act" or "reg"

• Node Properties:
      act_title       : string  // use only if source_type = "act"
      reg_title       : string  // use only if source_type = "reg"
      source_type     : "act" or "reg"
      snapshot_id     : string
      snapshot_date   : date
      section_number  : string
      section_title   : string
      content         : string
      url             : string

RULES FOR VALID CYPHER
• Always return all properties listed above — exactly once each. Do not repeat fields.
• NEVER return the same field more than once (e.g., `snapshot_id` must appear only once in RETURN).
• NEVER order by a property that is not projected in the current RETURN or WITH clause.
• If you use `DISTINCT` or aggregate (e.g., `MAX()`), you must:
   – Only ORDER BY columns present in the RETURN or WITH.
   – Avoid accessing `n.<field>` that is not grouped or aggregated.
• If you use `WITH`, pass all necessary fields forward to the next clause.

TEMPORAL LOGIC
• “First introduced” → node with **earliest** `snapshot_date` (use `ORDER BY ASC LIMIT 1`)
• “As of [DATE]” → latest node with `snapshot_date <= date(...)` (use `ORDER BY DESC LIMIT 1`)
• “Changed since [DATE]” → all nodes with `snapshot_date > date(...)` (use `ORDER BY ASC`)
• “On or around [DATE]” → ±1 day (use duration arithmetic)
• “Current in [Qx YYYY]” → interpret Q1–Q4 as last day of that quarter

REGULATION vs ACT
• If source_type = "act", match using `act_title`
• If source_type = "reg", match using `reg_title`

EXAMPLE MACROS

; Earliest version of an Act
MATCH (n)
WHERE n.act_title = "Coal Act" AND n.source_type = "act"
RETURN n.snapshot_id,
       n.snapshot_date,
       n.section_number,
       n.section_title,
       n.content,
       n.url,
       n.act_title,
       n.source_type
ORDER BY n.snapshot_date ASC
LIMIT 1;

; Regulation as of a given date
WITH date("2015-01-01") AS target
MATCH (n)
WHERE n.reg_title = "Occupational Health Regulation"
  AND n.source_type = "reg"
  AND n.snapshot_date <= target
RETURN n.snapshot_id,
       n.snapshot_date,
       n.section_number,
       n.section_title,
       n.content,
       n.url,
       n.reg_title,
       n.source_type
ORDER BY n.snapshot_date DESC
LIMIT 1;

; Acts changed on or around a date
WITH date("2012-12-31") AS target
MATCH (n)
WHERE n.source_type = "act"
  AND n.snapshot_date >= target - duration({{days:1}})
  AND n.snapshot_date <= target + duration({{days:1}})
WITH n.act_title AS act_title, max(n.snapshot_date) AS latest_date
RETURN act_title, latest_date
ORDER BY latest_date DESC;

RESPONSE FORMAT
• Output ONLY a Cypher query inside a ```cypher code block.
• Do not include any explanations, markdown, or text outside the block.

OTHER FACTS
Today's date: {today}
"""

In [13]:
# ── 2. HELPER THAT BUILDS THE CHAT PAYLOAD ───────────────────────────────────
def build_payload(user_query: str, max_tokens: int = 500) -> dict:
    """
    Build the JSON body required by the Azure-OpenAI /chat/completions endpoint.

    Parameters
    ----------
    user_query : str
        Whatever the human just asked, e.g. "When was the Coal Act first introduced?"
    max_tokens : int
        How many tokens you want back from the assistant.

    Returns
    -------
    dict
        The payload you pass to `requests.post(...)`  or the openai SDK.
    """
    return {
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user",   "content": user_query.strip()}
        ],
        "max_tokens": max_tokens
    }

In [14]:
SUMMARY_PROMPT = """
You are a legal research assistant.

You will be given a user question about legislation, a Cypher query that was used to retrieve relevant data from a Neo4j database, and the result of that query.

Your job is to:
1. Interpret what the query was trying to find.
2. Use the Neo4j output to give a complete, concise summary in plain English.
3. Mention the Act/Regulation name, section title (if any), the consolidation date, and any significant content from the text.
4. If available, include the official URL from the node.

Keep the summary to 2–4 short sentences. Do not include the raw Cypher query or raw Neo4j fields in your response.

Example format:
"The Coal Act was first introduced in the 19th consolidation dated 2008-12-12. The first section is titled 'Definitions' and describes what constitutes coal land, lease, location, and other terms. The full text can be accessed [here](https://...)."

Ready to summarize.
"""


In [15]:
def summarize_law_answer(user_question: str, cypher: str, neo4j_output: list) -> str:
    payload = {
        "messages": [
            {"role": "system", "content": SUMMARY_PROMPT},
            {"role": "user", "content": f"""Question:
{user_question}

Cypher:
{cypher}

Neo4j Output:
{neo4j_output}
"""}
        ],
        "max_tokens": 300,
        "temperature": 0.3
    }
    
    resp = requests.post(
        f"{API_BASE}/deployments/{DEPLOYMENT}/chat/completions?api-version={API_VER}",
        headers=HEADERS,
        json=payload,
        timeout=30
    )
    resp.raise_for_status()
    return resp.json()["choices"][0]["message"]["content"].strip()


### Quick Demo

In [16]:
# ── 3. EXAMPLE USAGE ─────────────────────────────────────────────────────────
if __name__ == "__main__":
    import json, requests, os

    user_question = "When was the Coal Act first introduced?"
    body          = build_payload(user_question, max_tokens=300)

    # --- Azure OpenAI call (same endpoint you tested in Postman) --------------
    API_BASE   = "https://apim-idmrncl4iiyvo.azure-api.net/aoai/openai"
    DEPLOYMENT = "gpt-4o"
    API_VER    = "2025-01-01-preview"
    API_KEY    = os.getenv("AOAI_APIM_KEY")          # set in your env

    url = f"{API_BASE}/deployments/{DEPLOYMENT}/chat/completions?api-version={API_VER}"
    headers = {
        "Content-Type": "application/json",
        "api-key": url_value  
    }

    resp = requests.post(url, headers=headers, json=body, timeout=30)
    resp.raise_for_status()
    #print(json.dumps(resp.json(), indent=2))

In [17]:
raw = resp.json()['choices'][0]['message']['content']

# Remove code block wrappers and unescape \n
cypher = (
    raw.replace("```cypher", "")
       .replace("```", "")
        .replace("\n", " ")
       .strip()
)
print(cypher)


MATCH (n) WHERE n.act_title = "Coal Act" AND n.source_type = "act" RETURN n.snapshot_id,        n.snapshot_date,        n.section_number,        n.section_title,        n.content,        n.url,        n.act_title,        n.source_type ORDER BY n.snapshot_date ASC LIMIT 1;


In [18]:
kg.query(cypher)

[{'n.snapshot_id': '19',
  'n.snapshot_date': neo4j.time.Date(2008, 12, 12),
  'n.section_number': None,
  'n.section_title': 'Definitions',
  'n.content': "1in this act :  coal land means land in which the coal or the right to explore for ,  develop and produce coal is vested in or reserved to the government ;  coal land reserve means coal land reserved under section 21 ;  lease means a valid and subsisting lease issued under section 18 ;  licence means a valid and subsisting licence issued under section 12 ;  location means the area of land specified in a licence or lease ;  owner means a the government for land owned by the government ,  b a person registered in the land title office as the registered owner of the surface area or as its purchaser under an agreement for sale ,  and c a person to whom a disposition of crown land has been issued under the land act ;  produce means mining and removing coal for use ,  marketing or sale ;  protected heritage property has the meaning in th

## Full Retrieval

In [19]:
def  cypher_query(user_question):
    #user_question = "When was the Coal Act first introduced?"
    body          = build_payload(user_question, max_tokens=300)
    
    # --- Azure OpenAI call (same endpoint you tested in Postman) --------------
    API_BASE   = "https://apim-idmrncl4iiyvo.azure-api.net/aoai/openai"
    DEPLOYMENT = "gpt-4o"
    API_VER    = "2025-01-01-preview"
    API_KEY    = os.getenv("AOAI_APIM_KEY")          # set in your env
    
    url = f"{API_BASE}/deployments/{DEPLOYMENT}/chat/completions?api-version={API_VER}"
    headers = {
        "Content-Type": "application/json",
        "api-key": url_value  
    }
    
    resp = requests.post(url, headers=headers, json=body, timeout=30)
    resp.raise_for_status()
    return resp

In [20]:
def clean_cypher(resp):
    raw = resp.json()['choices'][0]['message']['content']
    
    # Remove code block wrappers and unescape \n
    cypher = (
        raw.replace("```cypher", "")
           .replace("```", "")
            .replace("\n", " ")
           .strip()
    )
    #print(cypher)
    return cypher


In [21]:
def temporal_search(user_question: str) -> str:
    """
    • Generates Cypher with cypher_query()
    • Executes it in Neo4j via kg.query()
    • Returns a natural-language summary
    """
    # 1) get Cypher from the LLM
    cypher_block = cypher_query(user_question)   # returns ```cypher ... ```
    cypher       = clean_cypher(cypher_block)    # strip fences
    print(cypher)
    # 2) run the Cypher
    neo4j_output = kg.query(cypher)              # list[dict] (or however kg returns)

    # 3) summarise for the end-user
    return summarize_law_answer(user_question, cypher, neo4j_output)

In [22]:
# --------------------------------------------------------------------------------
# 1)  Put the 20 temporal prompts in a list
# --------------------------------------------------------------------------------
TEMPORAL_TESTS = [
    "when was the Family Relations Act repealed?",
    "how old is the Family Relations Act?",
    "When was the Coal Act first introduced?",
    "What did the Coal Act look like as of January 1st, 2015?",
    "Show the latest version of the Coal Act before 2020.",
    "Has the Coal Act changed since 2010?",
    "Which version of the Coal Act was in force on December 31, 2016?",
    "What was the earliest consolidation that included the Environmental Management Act?",
    "Give me the version of the Occupational Health and Safety Regulation valid in 2013.",
    "Was the term 'lease' defined in the Coal Act in 2008?",
    "What was the content of section 3 of the Mines Act in 2011?",
    "When did the term 'unit' first appear in the Coal Act?",
    "What was the law in place for the Coal Act during 2012?",
    "What version of the Health Act was active around mid-2014?",
    "Find the snapshot of the Forest Practices Code that was current in Q4 of 2009.",
    "What did section 5 of the Safety Standards Act say in June 2010?",
    "Was there a consolidation of the Water Sustainability Act in 2022?",
    "What did the Waste Management Act say in the earliest available version?",
    "What regulation governed surface water as of March 31, 2011?",
    "Which acts were updated on or around December 31, 2012?",
    "What did the Workers Compensation Act contain in its 2006 version?",
    "Show the definition section of the Mines Act at its first appearance."
]

In [23]:
summary = temporal_search(TEMPORAL_TESTS[0])

MATCH (n) WHERE n.act_title = "Family Relations Act" AND n.source_type = "act" RETURN n.snapshot_id,        n.snapshot_date,        n.section_number,        n.section_title,        n.content,        n.url,        n.act_title,        n.source_type ORDER BY n.snapshot_date DESC LIMIT 1;


In [24]:
def run_batch(questions, pause_sec: float = 0.5):
    """
    Runs temporal_search on each question and prints the result.
    Returns a list of dicts for later inspection.
    """
    import time

    results = []
    for q in questions:
        print(f"▶️  {q}")
        try:
            summary = temporal_search(q)
            results.append({"question": q, "summary": summary})
            print("   ✅", summary[:120], "…\n")      # first 120 chars preview
        except Exception as e:
            results.append({"question": q, "error": str(e)})
            print("   ❌ ERROR:", e, "\n")
        time.sleep(pause_sec)                         # avoid AOAI rate-limit
    return results


# ------------------------------------------------------------------
# Example: run on the 20 prompts we prepared earlier
# ------------------------------------------------------------------
if __name__ == "__main__":
    batch_results = run_batch(TEMPORAL_TESTS)


▶️  when was the Family Relations Act repealed?
MATCH (n) WHERE n.act_title = "Family Relations Act" AND n.source_type = "act" RETURN n.snapshot_id,        n.snapshot_date,        n.section_number,        n.section_title,        n.content,        n.url,        n.act_title,        n.source_type ORDER BY n.snapshot_date DESC LIMIT 1;
   ✅ The Family Relations Act was repealed on August 3, 2012. The section titled "Definitions" includes terms such as "child, …

▶️  how old is the Family Relations Act?
MATCH (n) WHERE n.act_title = "Family Relations Act" AND n.source_type = "act" RETURN n.snapshot_id,        n.snapshot_date,        n.section_number,        n.section_title,        n.content,        n.url,        n.act_title,        n.source_type ORDER BY n.snapshot_date ASC LIMIT 1;
   ✅ The Family Relations Act was consolidated on July 11, 2006. The first section is titled "Definitions" and provides defin …

▶️  When was the Coal Act first introduced?
MATCH (n) WHERE n.act_title = "Coal Ac