In [26]:
import warnings
from typing import List, Optional, Dict
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_groq import ChatGroq
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer

from langgraph.graph import END, StateGraph
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

warnings.filterwarnings("ignore", category=FutureWarning)

`1. Initializing Elastic search client, LLM model connection and Embedding model`

In [27]:
es_client = Elasticsearch(
    hosts="https://512baa73334c4127ade77bc2dfa2ef02.eastus2.azure.elastic-cloud.com:443",
    basic_auth=("elastic", "XRzYOABuG17Yv32UKAFySw41")
)

print(f"Elastic Search Connection successful: {es_client.ping()}")

llm = ChatGroq(
    temperature=0,
    model_name="llama-3.1-8b-instant",
    # api_key="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",
    api_key="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"
)

print("LLM Model loaded")

model_name = "nomic-ai/nomic-embed-text-v1"
model = SentenceTransformer(model_name, trust_remote_code=True)

print("Embedding Model loaded")

def embedding_creation(input_texts, model):
    embedding = model.encode(input_texts)
    return embedding.tolist()

Elastic Search Connection successful: True
LLM Model loaded


<All keys matched successfully>


Embedding Model loaded


`2. RAG`

`2.1 Multi Query Generation`

In [28]:
query = "List of Board of Trustees of Mohamed bin Zayed University?"

In [29]:
class MultiQuerySchema(BaseModel):
    Questions: List[str] = Field(description="List of questions")


MULTI_QUERY_PROMPT = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>  
    You are an AI, specialized in generating effective multi-queries for Retrieval Augmented Generation (RAG) systems.

    Guidelines:
    1. Analyze the user's original query and generate a set of five distinct yet relevant search queries that cover:
        - Synonyms and similar terms
        - Related subtopics or specific aspects
        - Broader and narrower variations of the original query
    2. Aim to create queries that will increase the chances of retrieving diverse, relevant results by considering different interpretations and possible user intents.
    3. Provide JSON response, with list of questions like {{ "Questions": ["Question 1", "Question 2", ..] }}
    4. Provide only the list of queries without any additional explanations or commentary.

    User Query: "{user_query}"
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["user_query"],
)

structured_llm = llm.with_structured_output(MultiQuerySchema, method="json_mode")

multi_query_generation = MULTI_QUERY_PROMPT | structured_llm

In [30]:
multi_query_response = multi_query_generation.invoke({"user_query": query})
multi_query_response.Questions

['Mohamed bin Zayed University Board of Directors',
 'Members of the Board of Trustees of Mohamed bin Zayed University',
 'List of Trustees of Mohamed bin Zayed University Abu Dhabi',
 'Mohamed bin Zayed University governing body members',
 'Who are the members of the Board of Trustees of Mohamed bin Zayed University?']

`2.2 Agent: Enriching Queries with Synonyms`

In [31]:
# 1. Query Enriching Agent

QUERY_ENRICHING_PROMPT = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>  
    You are an AI assistant specialized in generating Elasticsearch query strings. 
    Your task is to create the most effective query string for the given list of user questions. 
    This query string will be used to search for relevant documents in an Elasticsearch index.

    Guidelines:
    1. Analyze the user's question carefully.
    2. Generate ONLY a query string suitable for Elasticsearch's match query.
    3. Focus on key terms and concepts from the question.
    4. Include synonyms or related terms that might be in relevant documents.
    5. Use simple Elasticsearch query string syntax if helpful (e.g., OR, AND).
    6. Do not use advanced Elasticsearch features or syntax.
    7. Do not include any explanations, comments, or additional text.
    8. Provide only the query string, nothing else.
    9. Do not repeat the query string i.e., there must not be any duplicates in response

    For the question "What is Clickthrough Data?", we would expect a response like:
    clickthrough data OR click-through data OR click through rate OR CTR OR user clicks OR ad clicks OR search engine results OR web analytics

    AND operator is not allowed. Use only OR.

    List of user Questions: {user_query}
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["user_query"],
)

query_enricher = QUERY_ENRICHING_PROMPT | llm | StrOutputParser()

In [32]:
# def parse_or_query(query_text: str) -> List[str]:
#     terms = query_text.split(" OR ")
#     query = {
#         "query": {
#             "bool": {
#                 "should": [ {"match": {"content": term.strip()}} for term in terms]
#             }
#         }
#     }
#     return query

def parse_or_query(query_text: str) -> List[str]:
    return [term.strip() for term in query_text.split(' OR ')]

unique_enriched_keys = set()

enriched_query = query_enricher.invoke({"user_query": multi_query_response.Questions})
print(f"Enriched Query: {enriched_query}")

enriche_parsed_query = parse_or_query(enriched_query)
print(f"Enriched Parsed Query: {enriche_parsed_query}")

# for query in multi_query_response.Questions:
#     enriched_query = query_enricher.invoke({"user_query": query})
#     print(f"Enriched Query: {enriched_query}")

#     enriche_parsed_query = parse_or_query(enriched_query)
#     print(f"Enriched Parsed Query: {enriche_parsed_query}")

#     unique_enriched_keys.update(enriche_parsed_query)

Enriched Query: mohamed bin zayed university OR mohamed bin zayed OR mohamed bin OR zayed university OR university board of directors OR board of directors OR board of trustees OR members of the board of trustees OR governing body OR governing body members OR mohamed bin zayed university abu dhabi OR abu dhabi
Enriched Parsed Query: ['mohamed bin zayed university', 'mohamed bin zayed', 'mohamed bin', 'zayed university', 'university board of directors', 'board of directors', 'board of trustees', 'members of the board of trustees', 'governing body', 'governing body members', 'mohamed bin zayed university abu dhabi', 'abu dhabi']


`2.3 Tool: Hybrid Search`

In [33]:
def hybrid_vector_search(
        conn,
        index_name: str, 
        query_keys: str, 
        query_vector: List[float], 
        text_fields: List[str], 
        vector_field: str, 
        num_candidates: int = 100, 
        num_results: int = 10,
    ) -> Dict:


    search_body = {
            "knn": {
                "field": vector_field,
                "query_vector": query_vector,
                "k": num_candidates,
                "num_candidates": num_candidates
            },
            "query": {
                "bool": {
                    "must": [
                        {
                            "multi_match": {
                                "query": " ".join(query_keys),  
                                "fields": text_fields,
                                "type": "best_fields",
                                "operator": "or" 
                            }
                        }
                    ],
                    "should": [
                        {
                            "script_score": {
                                "query": {"match_all": {}},
                                "script": {
                                    "source": """
                                    double vector_score = cosineSimilarity(params.query_vector, params.vector_field) + 1.0;
                                    double text_score = _score;
                                    return 0.7 * vector_score + 0.3 * text_score;
                                    """,
                                    "params": {
                                        "query_vector": query_vector,  
                                        "vector_field": vector_field
                                    }
                                }
                            }
                        }
                    ]
                }
            }
        }

    response = conn.search(index=index_name, body=search_body, size=num_results)

    return response, search_body


In [34]:
query_vector = embedding_creation(query, model)
response, search_body = hybrid_vector_search(
        es_client,
        index_name="abudhabi-policies-legislations-v4", 
        query_keys= enriche_parsed_query,
        query_vector=query_vector, 
        text_fields=["content", "heading", "questions", "reference"], 
        vector_field="embedded_content", 
        num_candidates = 25, 
        num_results = 3
    )

result = []
all_content = ""
for j in response['hits']['hits']:
    content = j["_source"]
    _result = {}
    _result["score"] = j["_score"]
    _result["heading"] = content['heading']
    _result["content"] = content['content']
    all_content = all_content + '\n' + content['content']
    _result["reference"] = content['reference']

    result.append(_result)

result

  response = conn.search(index=index_name, body=search_body, size=num_results)


[{'score': 100.12006,
  'heading': 'executive council resolution no. (11) of 2023 concerning the reformation of the board of trustees of mohamed bin zayed university for humanities',
  'content': 'executive council resolution no. (11) of 2023 concerning the reformation of the board of trustees of mohamed bin zayed university for humanities having reviewed chairman of the executive council resolution no. (27) of 2020 concerning the formation of the board of trustees of mohamed bin zayed university for humanities. the executive council has decided the following: 1. the board of trustees of mohamed bin zayed university for humanities shall be reformed under the chairmanship of h.e. dr. mohammed rashid ahmed al hamli, and the membership of: • mubarak hamad mubarak al muhairi - vice chairman • mohammed hamza hassan al qassim. • maryam eid khamis al muhairi • abdullah aqueeda ali al muhairi • dr. omar habtoor theeb al-derei. • ghanem sultan ahmed al suwaidi. 2. the membership term of the boa

`2.4 Agent: Extracting Reference Document details`

In [35]:
def reference_search(conn, index_name, reference_index):
    search_body = {
        "size": 1,
        "query": {
            "bool": {
                "must": [
                    {
                    "match": {"heading": reference_index}
                    }
                ]
            }
        }
    }
    response = conn.search(index=index_name, body=search_body)

    return response
    

ref_doc = list()
for doc in result:
    for reference_index in doc["reference"]:
        response = reference_search(es_client, "abudhabi-policies-legislations-v4", reference_index)
        result = []
        for j in response['hits']['hits']:
            content = j["_source"]
            _result = {}
            _result["score"] = j["_score"]
            _result["heading"] = content['heading']
            _result["content"] = content['content']
            _result["reference"] = content['reference']

            ref_doc.append(_result)

In [36]:
ref_doc

[{'score': 24.451162,
  'heading': 'executive council resolution no. (11) of 2023 concerning the reformation of the board of trustees of mohamed bin zayed university for humanities',
  'content': 'executive council resolution no. (11) of 2023 concerning the reformation of the board of trustees of mohamed bin zayed university for humanities having reviewed chairman of the executive council resolution no. (27) of 2020 concerning the formation of the board of trustees of mohamed bin zayed university for humanities. the executive council has decided the following: 1. the board of trustees of mohamed bin zayed university for humanities shall be reformed under the chairmanship of h.e. dr. mohammed rashid ahmed al hamli, and the membership of: • mubarak hamad mubarak al muhairi - vice chairman • mohammed hamza hassan al qassim. • maryam eid khamis al muhairi • abdullah aqueeda ali al muhairi • dr. omar habtoor theeb al-derei. • ghanem sultan ahmed al suwaidi. 2. the membership term of the boa

`2.5 Agent: Critic Agent`

In [37]:
CRITIC_PROMPT = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>  
    You are a critical evaluator tasked with assessing the relevance of retrieved documents to a user's query. For each document, determine if it contains any keywords related to the user's question.

    Guidelines:
    1. Analyze the user's question and identify key terms and concepts.
    2. Consider synonyms and related terms that may appear in relevant documents.
    3. Provide JSON response with following format: 
    {{
      "Document Title 1": true/false,
      "Document Title 2": true/false,
      ...
    }}
    4. Do not include explanations or additional text.

    Documents: {documents}
    User Question: {question}

    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>

    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "documents"],  
)

relevency_checking = CRITIC_PROMPT | llm

In [39]:
reference_doc = list()

for doc in ref_doc:
    reference_doc.append(doc['content'])

reference_doc

reference_doc = set(reference_doc)
reference_doc

{" we, khalifa bin zayed al nahyan, ruler of abu dhabi having reviewed: • law no. (1) of 1974 concerning the reorganisation of the governmental body in the emirate of abu dhabi and its amendments; • law no. (2) of 1971 concerning the national advisory council, and its amendments; • law no. (2) of 2000 concerning the civil retirement pensions and benefits in the emirate of abu dhabi, and its amendments; • law no. (1) of 2017 concerning the financial system of abu dhabi government; • law no. (27) of 2019 concerning the department of finance; • law no. (19) of 2020 concerning the re-organisation of abu dhabi accountability authority; • law no. (24) of 2020 concerning the supreme council for financial and economic affairs; and • based on what was presented to and approved by the executive council; issued the following law: article (1) • the definition of the ‘financial policy’ in article (1) of the aforesaid law no. (1) of 2017 shall be replaced by the following definition: ‘financial poli

In [40]:
response = relevency_checking.invoke(
    {
        "question": query, 
        "documents": reference_doc
    }
)

In [41]:
print(response.content)

{
  "Executive Council Resolution No. (11) of 2023 concerning the Reformation of the Board of Trustees of Mohamed bin Zayed University for Humanities": true,
  "Executive Council Resolution No. (27) of 2020 concerning the Formation of the Board of Trustees of Mohamed bin Zayed University for Humanities": true,
  "Law No. (1) of 2017 concerning the Financial System of Abu Dhabi Government": false,
  "Law No. (24) of 2020 concerning the Supreme Council for Financial and Economic Affairs": false,
  "Law No. (1) of 1974 concerning the Reorganisation of the Governmental Body in the Emirate of Abu Dhabi and its Amendments": false,
  "Law No. (2) of 1971 concerning the National Advisory Council, and its Amendments": false,
  "Law No. (2) of 2000 concerning the Civil Retirement Pensions and Benefits in the Emirate of Abu Dhabi, and its Amendments": false,
  "Law No. (27) of 2019 concerning the Department of Finance": false,
  "Law No. (19) of 2020 concerning the Re-organisation of Abu Dhabi Ac

In [42]:
import json
conditional_response = json.loads(response.content)
conditional_response

JSONDecodeError: Unterminated string starting at: line 305 column 3 (char 31274)

In [161]:
for topic, value in conditional_response.items():
    if value:
        all_content += f'\n{topic}: {value}'

`2.6 Agent: Response creation`

In [162]:
# Define the prompt template
prompt = PromptTemplate(
    template="""<|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>  
    You are a legal assistant, your task is to answer the user question based on given documents. 

    Legal Documents: {documents}

    Response Instructions:
    1. Formulate a concise, informative response based on the extracted information for the users question.
    2. Reference specific sections or clauses of the documents to support your answer.
    3. Do not answer the question based general knowledge, if the given documents do not have sufficient information to answer the question then send response as "I don't have knowledge to answer your question"
    4. Do not start your response like Based on the provided documents,  here are the details about ...
    5. Make sure that you are including any quantitative mesures

    User Question: {question}

    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "documents"],  # Fixed typo here
)

# Create the question router
question_router = prompt | llm | StrOutputParser() 

response = question_router.invoke(
    {
        "question": query, 
        "documents": all_content  # Use the corrected variable name
    }
)

In [163]:
print(response)

The members of the Board of Trustees of Mohamed bin Zayed University for Humanities are:

1. H.E. Dr. Mohammed Rashid Ahmed Al Hamli - Chairman
2. Mubarak Hamad Mubarak Al Muhairi - Vice Chairman
3. Mohammed Hamza Hassan Al Qassim
4. Maryam Eid Khamis Al Muhairi
5. Abdullah Aqeela Ali Al Muhairi
6. Dr. Omar Habtoor Theeb Al-Derei
7. Ghanem Sultan Ahmed Al Suwaidi

The membership term of the Board shall be three renewable years. (Executive Council Resolution No. (11) of 2023, Clause 2)


In [167]:
response = question_router.invoke(
    {
        "question": query, 
        "documents": all_content  # Use the corrected variable name
    }
)

for s in question_router.stream(    {
        "question": query, 
        "documents": all_content  # Use the corrected variable name
    }
):
    print(s, end="", flush=True)

The members of the Board of Trustees of Mohamed bin Zayed University for Humanities are:

1. H.E. Dr. Mohammed Rashid Ahmed Al Hamli - Chairman
2. Mubarak Hamad Mubarak Al Muhairi - Vice Chairman
3. Mohammed Hamza Hassan Al Qassim
4. Maryam Eid Khamis Al Muhairi
5. Abdullah Aqeela Ali Al Muhairi
6. Dr. Omar Habtoor Theeb Al-Derei
7. Ghanem Sultan Ahmed Al Suwaidi

The membership term of the Board shall be three renewable years. (Executive Council Resolution No. (11) of 2023, Clause 2)

In [166]:
s

''