In [None]:
import os

from pprint import pprint
from dotenv import load_dotenv
from neo4j import GraphDatabase
from langchain_neo4j import Neo4jGraph
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from src.grag import create_vector_cypher_retriever_tool
from src.grag import create_text2cypher_retriever_tool
from src.grag import Text2CypherFallback
from src.grag import create_workflow
from src.grag import create_agent

load_dotenv(".env")

In [None]:
URI = os.environ["DATABASE_HOST"]
DATABASE = os.environ["DATABASE_SMALL"]
USERNAME = os.environ["DATABASE_USERNAME"]
PASSWORD = os.environ["DATABASE_PASSWORD"]
DATABASE = os.environ["DATABASE_SMALL"]

neo4j_config = {
    "DATABASE_NAME": DATABASE,
    "ARTICLE_VECTOR_INDEX_NAME": os.environ["ARTICLE_VECTOR_INDEX_NAME"],
    "ARTICLE_FULLTEXT_INDEX_NAME": os.environ["ARTICLE_FULLTEXT_INDEX_NAME"],
    "DEFINITION_VECTOR_INDEX_NAME": os.environ["DEFINITION_VECTOR_INDEX_NAME"],
    "DEFINITION_FULLTEXT_INDEX_NAME": os.environ["DEFINITION_FULLTEXT_INDEX_NAME"],
}

# Ngga jadi pakai ini karena Text2Cypher mintannya harus pakai Neo4jGraph
# neo4j_driver = GraphDatabase.driver(uri=URI, auth=(USERNAME, PASSWORD))

neo4j_graph = Neo4jGraph(
    url=URI,
    username=USERNAME,
    password=PASSWORD,
    database=DATABASE,
    enhanced_schema=True
)

neo4j_driver = neo4j_graph._driver  # Ambil driver nya kaya gini

embedder_model = HuggingFaceEmbeddings(model_name=os.environ["EMBEDDING_MODEL"])

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.0,
    api_key=os.environ["GOOGLE_API_KEY"]
)

In [None]:
vector_cypher_retriever = create_vector_cypher_retriever_tool(
    embedder_model=embedder_model,
    neo4j_driver=neo4j_driver,
    neo4j_config=neo4j_config,
    top_k_initial_article=5,
    total_article_limit=10,
    total_definition_limit=10
)

text2cypher_retriever = create_text2cypher_retriever_tool(
    neo4j_graph=neo4j_graph,
    embedder_model=embedder_model,
    cypher_llm=llm,
    qa_llm=llm,
    skip_qa_llm=True,
    verbose=False
)

In [None]:
# Harus memastikan hanya memanggil 1 tool dalam sebuah pemanggilan

SYSTEM_PROMPT = """You are an intelligent assistant that can query a legal graph database (Neo4j) using `text2cypher_retriever` or `vector_cypher_retriever`. Your goal is to accurately `Answer` user queries by utilizing some tools to fetch relevant information.

### Instructions:
1. **Understand the User Query**
   - Carefully analyze the user's question and determine the best approach to retrieve the required information.

2. **Use Available Tools**
   - If the user asks **general questions** that can't be writed in Neo4j Cypher, use `vector_cypher_retriever`.
   - If the user asks for **regulation structure, relationships, or anything that can be represented as Neo4j Cypher**, use `text2cypher_retriever`.
   - Make sure to only call 1 tool in a call

3. **Maintain Accuracy and Completeness**
   - Your default language is English, but you should `Answer` the user query in the same language as the query.
   - Always provide precise and concise `Answer` based on the retrieved data.
   - If the retrieved data contains legal articles with subsections, structure them in a markdown list format.
   - Ensure that your final `Answer` is well-formatted in Markdown.

5. **Handle Errors Gracefully**
   - If you dont have the `Answer`, inform the user that no relevant information was found, instead of making assumptions.
   - If the query is ambiguous, ask for clarification before proceeding.
"""

In [None]:
from IPython.display import display, Image
from langgraph.checkpoint.memory import MemorySaver

config_1 = {"configurable": {"thread_id": "1"}}
config_2 = {"configurable": {"thread_id": "2"}}
checkpointer = MemorySaver()

workflow = create_workflow(
    llm, [text2cypher_retriever, vector_cypher_retriever],
    prompt=SYSTEM_PROMPT,
    checkpointer=checkpointer,
    fallback_tool_calling_cls=Text2CypherFallback
)

# display(Image(workflow.get_graph().draw_mermaid_png()))
# print(workflow.get_graph().draw_mermaid())
print(workflow.get_graph().draw_ascii())

In [None]:
# Test fallback tool calling

query = "Apa isi dari pasal 100 UU no 11 tahun 2008 apa ya?"
response = workflow.invoke({"messages": query}, config_1)
display(response["messages"])
print(response["messages"][-1].content)

In [None]:
query = "Kalau pasal 28?"
response = workflow.invoke({"messages": query}, config_1)
display(response["messages"])
print(response["messages"][-1].content)

In [None]:
query = (
    # Dia masih salah karena hanya mengambil relasi RELATED_TO
    "Di antara semua pasal di database mu, pasal apa yang paling banyak hubungan (semua jenis hubungan)? "
    "Kemudian sebutkan nama semua pasal yang berelasi dengannya"
)
response = workflow.invoke({"messages": query}, config_1)
display(response["messages"])
print(response["messages"][-1].content)

In [None]:
query = "Apa pengertian dari penyelenggara sistem elektronik menurut peraturan?"
response = workflow.invoke({"messages": query}, config_1)
display(response["messages"])
print(response["messages"][-1].content)

In [None]:
# Ngga dapet datanya :)
# Kemungkinan karena definisi nya cuma keambil 5,
# dan yang bahas khusus Sistem Elektronik malah ngga keambil
# Ternyata kayanya emang benerang ngga ada definisinya
query = "Dari definisinya penyelenggara sistem elektronik tadi ada disebut sistem elektronik, memangnya itu apa?"
response = workflow.invoke({"messages": query}, config_1)
display(response["messages"])
print(response["messages"][-1].content)

In [None]:
query = "Oke karena ngga dapat datanya, coba jelaskan menurutmu saja apa definisi sistem elektronik? tanpa melihat database"
response = workflow.invoke({"messages": query}, config_1)
display(response["messages"])
print(response["messages"][-1].content)