In [1]:
import json


from dotenv import load_dotenv
load_dotenv()
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.graphs import Neo4jGraph
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq



def clean_id(n):
    return n.strip().lower()




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
try:
    graph = Neo4jGraph()
except Exception as e:
    print(e)
    exit()
graph.query("""CREATE VECTOR INDEX section_embedding IF NOT EXISTS FOR (s:Section) ON (s.embedding) OPTIONS {indexConfig: {`vector.dimensions`: 1024,`vector.similarity_function`: 'cosine'}}""")
graph.query("""CREATE VECTOR INDEX concept_embedding IF NOT EXISTS FOR (lc:LegalConcept) ON (lc.embedding) OPTIONS {indexConfig: {`vector.dimensions`: 1024,`vector.similarity_function`: 'cosine'}}""")

  graph = Neo4jGraph()


Could not connect to Neo4j database. Please ensure that the url is correct


NameError: name 'graph' is not defined

In [None]:
graph.query("CREATE CONSTRAINT unique_chapter_id IF NOT EXISTS FOR (c:Chapter) REQUIRE c.id IS UNIQUE")
graph.query("CREATE CONSTRAINT unique_section_id IF NOT EXISTS FOR (s:Section) REQUIRE s.id IS UNIQUE")
allowed_nodes = ["Provision","Actor","Category","Violation","Condition","Remedy","Penalty","Authority"]

In [None]:
for node_label in allowed_nodes:
    graph.query(
        f"CREATE CONSTRAINT unique_{node_label.lower()}_id IF NOT EXISTS FOR (n:{node_label}) REQUIRE n.id IS UNIQUE")

llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash', temperature=0)
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")

allowed_rels = ["DEFINES","QUALIFIES_AS","INCLUDES","EXCLUDES","CONSTITUTES","PROHIBITS","ESTABLISHES","FILED_BEFORE","APPEALS_TO","ALLOWS_RELIEF","CARRIES_PENALTY","LIABLE_FOR","GRANTS_RIGHT"]
llm_transformer = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes, allowed_relationships=allowed_rels)



In [None]:
with open("cpa_anchored_refined_v2.json", "r", encoding="utf-8") as f:
    data = json.load(f)
    for chapter in data:
        chapter_name = clean_id(chapter["chapter_name"])
        graph.query("MERGE (c:Chapter {id : $id})", params={'id': chapter_name})
        for section in chapter["sections"]:

            section_id = clean_id(str(section["section_id"]))
            section_title = clean_id(section["title"])
            text = section["title"] + section["original_content"]
            section_embedding = embeddings.embed_query(text)
            graph.query(
                '''MATCH (c:Chapter {id : $cid}) MERGE (s:Section {id: $sid}) SET s.title=$title, s.text=$text, s.embedding=$embedding MERGE (c)-[:CONTAINS]->(s)''',
                params={'cid': chapter_name, 'sid': section_id, 'title': section_title, 'text': text,
                        'embedding': section_embedding})
            if section["section_id"] != "2":
                doc = Document(page_content=text)
                graph_extracted = llm_transformer.convert_to_graph_documents([doc])
                if graph_extracted:
                    graph_docs = graph_extracted[0]
                    for node in graph_docs.nodes:
                        node.id = clean_id(node.id)
                    graph.add_graph_documents([graph_docs])
                    for node in graph_docs.nodes:
                        graph.query(
                            '''MATCH (s:Section {id :$id})
                            MATCH (n) WHERE n.id=$node_id
                            MERGE (s)-[:CONTAINS]->(n)''', params={'node_id': node.id, 'id': section_id}
                        )
            else:

                for unit in section["atomic_units"]:
                    print(unit)

                    term = clean_id(unit["term"])
                    definition = unit["text"]
                    term_embedding = embeddings.embed_query(definition)
                    graph.query(
                        '''Match (s:Section {id :$sid}) MERGE (lc:LegalConcept {id:$term}) SET lc.definition=$definition, lc.source="Section 2",lc.embedding=$embedding MERGE (s)-[:DEFINES]->(lc)''',
                        params={'term': term, 'definition': definition, 'sid': section_id, 'embedding': term_embedding})
                    try:
                        doc = Document(page_content=definition)
                        graph_extracted = llm_transformer.convert_to_graph_documents([doc])
                        if graph_extracted:
                            graph_docs = graph_extracted[0]
                            for node in graph_docs.nodes:
                                node.id = clean_id(node.id)
                            graph.add_graph_documents([graph_docs])
                            for node in graph_docs.nodes:
                                if node.id.lower() != term:
                                    graph.query(
                                        '''MATCH (lc:LegalConcept {id:$term}) MATCH (n) WHERE n.id = $node_id MERGE (lc)-[:MENTIONS]->(n)''',
                                        params={'node_id': node.id, 'term': term}
                                    )
                    except Exception as e:
                        print(term, e)

In [None]:
ret_query='''
CALL db.index.vector.queryNodes('section_embedding',5,$embedding)
YIELD node as s, score
WHERE score>0.7
WITH collect(
{ type: 'Section',
    title: s.title,
    text: s.text,
    score: score,
    id: s.id
}
) as section_results

CALL db.index.vector.queryNodes('concept_embedding',5,$embedding)
YIELD node as lc, score
WHERE score>0.7
MATCH (section2:Section)-[:DEFINES]->(lc)
WITH section_results, collect(
{ type: 'definition',

    term: lc.id,
    definition: lc.definition,

    score: score,
    source: section2.title}) as concept_results

UNWIND section_results as sec_res
MATCH (s:Section {id: sec_res.id})
OPTIONAL MATCH (s)-[:MENTIONS]->(entity)
WITH section_results, concept_results, sec_res, labels(entity) as tags, entity
WITH section_results, concept_results, sec_res, tags, entity,
CASE
    WHEN 'Authority' IN tags THEN 'Authority'
    WHEN 'Offense' IN tags THEN 'Offense'
    WHEN 'Penalty' IN tags THEN 'Penalty'
    WHEN 'Remedy' IN tags THEN 'Remedy'
    WHEN 'Stakeholder' IN tags THEN 'Stakeholder'
    ELSE head(tags) END AS label

WITH section_results, concept_results, sec_res, collect(DISTINCT entity.id + ' (' +label+ ')') as entities

WITH
collect({ type: 'Section',
    title: sec_res.title,
    text: sec_res.text,
    score: sec_res.score,
    mentions: entities
}) as updated_section, concept_results

RETURN {
sections: updated_section,
definitions: concept_results} as context

'''


In [None]:
groq_llm=ChatGroq(model_name="llama3-70b-8192",temperature=0)

In [None]:
user_query=input()
user_query_vector=embeddings.embed_query(user_query)
try:
    result=graph.query(ret_query,params={'embedding':user_query_vector})
    if not result:
        print("No result from the graph")

    else:
        context =result[0]['context']
except Exception as e:
    print(e)
llm_query=''
if context:
    if context['definitions']:
        llm_query += 'Relevant Legal Definitions:\n'
        for item in context['definitions']:
            llm_query+=f'Term: {item["term"]}\n'
            llm_query += f'Source: {item["source"]}\n'
            llm_query += f'Definition: {item["definition"]}\n'

            llm_query += f'Score: {item["score"]}\n\n'
    if context['sections']:
        llm_query += 'Relevant Legal Sections:\n'
        for item in context['sections']:
            llm_query += f'Title: {item["title"]}\n'
            llm_query += f'Text: {item["text"]}\n'

            if item['mentions']:
                mentions=[m for m in item['mentions']]
                llm_query += f'Mentions: {", ".join(mentions)}\n'
print(llm_query)



In [None]:
system_prompt = """
You are an expert Legal Assistant for Indian Consumer Law.
Answer the user's question STRICTLY based on the provided context below.

Rules:
1. Use the "Relevant Definitions" to clarify terms.
2. Use the "Relevant Sections" to support your legal arguments.
3. Pay special attention to "Connected Entities" to understand who is responsible (e.g., Authorities vs Stakeholders).
4. If the answer is not in the context, state "I cannot find the answer in the provided legal documents."

Context:
{llm_query}
"""

In [None]:
from langchain_core.prompts import ChatPromptTemplate
prompt=ChatPromptTemplate.from_messages([('system',system_prompt),('user',user_query)])

In [None]:
from langchain_core.output_parsers import StrOutputParser
chain = prompt|groq_llm|StrOutputParser()


In [None]:
response=chain.invoke({'llm_query':llm_query,'user':user_query})

In [None]:
user_query

In [None]:
response