In [None]:
%pip install openai neo4j langchain_openai

In [None]:

import os
from neo4j import GraphDatabase
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain.schema import HumanMessage
import pandas as pd

# --- Azure OpenAI config ---
API_KEY = "PASTE_YOUR_AZURE_OPENAI_KEY_HERE"
API_ENDPOINT = "https://ameytxtai.openai.azure.com/"
API_VERSION = "2023-12-01-preview"
DEPLOYMENT_NAME_LLM = "test_jpm_3_5"
DEPLOYMENT_NAME_EMBED = "text-embedding01"

# --- Neo4j config ---
NEO4J_URI = "neo4j+s://c0bc26d2.databases.neo4j.io"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "PASTE_YOUR_NEO4J_PASSWORD_HERE"
VECTOR_INDEX_NAME = "chunkEmbeddings"

# --- Initialize drivers ---
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

embedder = AzureOpenAIEmbeddings(
    azure_deployment=DEPLOYMENT_NAME_EMBED,
    api_key=API_KEY,
    api_version=API_VERSION,
    azure_endpoint=API_ENDPOINT
)

llm = AzureChatOpenAI(
    deployment_name=DEPLOYMENT_NAME_LLM,
    openai_api_key=API_KEY,
    azure_endpoint=API_ENDPOINT,
    openai_api_version=API_VERSION,
    temperature=0.3
)


In [None]:

query = "What are the risks that Apple faces?"

# --- Embed the query ---
query_vector = embedder.embed_query(query)


In [None]:

with driver.session() as session:
    result = session.run(
        f"""
        CALL db.index.vector.queryNodes($index_name, 10, $query_vector)
        YIELD node, score
        RETURN node.text AS text, id(node) AS node_id, score
        """,
        index_name=VECTOR_INDEX_NAME,
        query_vector=query_vector
    )
    records = result.data()

df = pd.DataFrame(records)
print("Retrieved Chunks:")
print(df)

node_ids = [record["node_id"] for record in records]


In [None]:

context_records = []

with driver.session() as session:
    for node_id in node_ids:
        result = session.run(
            """
            MATCH (n) WHERE id(n) = $node_id
            MATCH (n)-[:FROM_DOCUMENT]-(doc:Document)-[:FILED]-(company:Company)-[:FACES_RISK]-(risk:RiskFactor)
            RETURN company.name AS company, n.text AS context, collect(DISTINCT risk.name) AS risks
            """,
            node_id=node_id
        )
        context_records.extend(result.data())

if len(context_records) == 0:
    print("No enriched context found; falling back to plain chunk text.")
else:
    print("Enriched Context Records Found:")
    df_context = pd.DataFrame(context_records)
    print(df_context)


In [None]:

if len(context_records) > 0:
    context_strings = []
    for record in context_records:
        risks = ", ".join(record.get("risks", []))
        company = record.get("company", "Unknown Company")
        context_text = record.get("context", "")
        context_strings.append(f"Company: {company}\nRisks: {risks}\nContext: {context_text}")

    final_context = "\n\n".join(context_strings)
else:
    # fallback to plain text chunks if no enriched context
    final_context = "\n".join([r["text"] for r in records])

prompt = f"""
You are a helpful assistant. Based on the following context, answer the question.

Context:
{final_context}

Question:
{query}
"""

response = llm([HumanMessage(content=prompt)])
print("\nResponse from Azure OpenAI:")
print(response.content)
