In [206]:

from typing import List
from pydantic import BaseModel
from typing_extensions import TypedDict
from langchain.output_parsers import PydanticOutputParser

class ReWOO(TypedDict):
    task: str
    plan_string: str
    steps: List
    results: dict
    result: str

# Define the output structure with Pydantic
class TaskOutput(BaseModel):
    task_number: int
    idea: str
    tool: str
    inputs: str

class TaskOutputList(BaseModel):
    tasks: List[TaskOutput]

output_parser = PydanticOutputParser(pydantic_object=TaskOutputList)

In [207]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    temperature=0,
    model_name="llama-3.2-90b-text-preview",#"llama3-70b-8192",
    api_key="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"
)

In [261]:
prompt = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>  
    You are a expert in making plans to solve the task by creating step by step plan. you should always think about what to do for solving the problem

    Guidelines:
    1. For each plan, specify which external tool to use and the corresponding input for evidence retrieval. 
    2. if the user question is stright forward and can be answered without dividing the plan then return only one plan.
    3. If the user question is complex and requires multiple steps of plan.
    4. The response must not add any additional explanations or commentary.
    5. You are allowed to use only following tools to solve the given task:
        - Retriever[input]: Worker that searches relevent document from elasticsearch private knowledgebase. 
        And this is useful when you need to find Abu Dhabi laws and regulations realted answers about a specific topic. 
        The input should be a search query.
        - LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general world knowledge and common sense.
        Prioritize it when you are confident in solving the problem yourself. Input can be any instruction.
    6. Generate a response in the following structured format: {format_instructions}
    
    Example
    Task: What are all the ground water strategy plan in UAE from 2018 to 2020
    {{
        "tasks": [
            {{
                "task_number": 1,
                "idea": "Extract the document related to ground water strategy plan in UAE at 2018 from elasticsearch using Retriever tool",
                "tool": "Retriever",
                "inputs": "ground water strategy plan in UAE at 2018"
            }},
            {{
                "task_number": 2,
                "idea": "Extract the document related to ground water strategy plan in UAE at 2019 from elasticsearch using Retriever tool",
                "tool": "Retriever",
                "inputs": "ground water strategy plan in UAE at 2019"
            }},
            {{
                "task_number": 3,
                "idea": "Extract the document related to ground water strategy plan in UAE at 2020 from elasticsearch using Retriever tool",
                "tool": "Retriever",
                "inputs": "ground water strategy plan in UAE at 2020"
            }},
            {{
                "task_number": 4,
                "idea": "Compare the result of task_number 1, task_number 2 and task_number 3 using LLM tool",
                "tool": "LLM",
                "inputs": "Summarize the traffic roles data with task_number 1, task_number 2 and task_number 3 summary"
            }}
        ]
    }}

    Begin! 
    Describe your plans with rich details. Each Plan should be followed by only one #E.

    Task: {task}
    """

In [262]:
format_instructions=output_parser.get_format_instructions()

In [263]:
task = "What is the trafic roles implemented in 2015"
result = llm.invoke(prompt.format(task=task, format_instructions=output_parser.get_format_instructions()))

In [267]:
print(result.content)

{
    "tasks": [
        {
            "task_number": 1,
            "idea": "Extract the document related to traffic roles implemented in 2015 from elasticsearch using Retriever tool",
            "tool": "Retriever",
            "inputs": "traffic roles implemented in 2015"
        },
        {
            "task_number": 2,
            "idea": "Summarize the traffic roles data using LLM tool",
            "tool": "LLM",
            "inputs": "Summarize the traffic roles data with task_number 1"
        }
    ]
}


In [273]:
import json

tasks = json.loads(result.content)
tasks

{'tasks': [{'task_number': 1,
   'idea': 'Extract the document related to traffic roles implemented in 2015 from elasticsearch using Retriever tool',
   'tool': 'Retriever',
   'inputs': 'traffic roles implemented in 2015'},
  {'task_number': 2,
   'idea': 'Summarize the traffic roles data using LLM tool',
   'tool': 'LLM',
   'inputs': 'Summarize the traffic roles data with task_number 1'}]}

In [274]:
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from typing import List, Dict, Union
from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [275]:
@tool
def Retriever(input_texts: Union[str, List[str]], ) -> List[float]:
    """
    perform the elasticsearch

    Args:
        input_texts (Union[str, List[str]]): The input text(s) for which to be search

    Returns:
        List[float]: The generated embeddings as a list.
    """
    model_name = "nomic-ai/nomic-embed-text-v1"
    model = SentenceTransformer(model_name, trust_remote_code=True)
    
    # Ensure input_texts is a list
    if isinstance(input_texts, str):
        input_texts = [input_texts]

    # Validate input_texts
    if not isinstance(input_texts, list) or not all(isinstance(text, str) for text in input_texts):
        raise ValueError("input_texts must be a string or a list of strings.")

    # Create embeddings
    embedding = model.encode(input_texts).tolist()[0]

    search_body = {
            "knn": {
                "field": "embedded_content",
                "query_vector": embedding,
                "k": 1,
                "num_candidates": 50
            },
            "query": {
                "bool": {
                    "must": [
                        {
                            "multi_match": {
                                "query": " ".join(input_texts),  
                                "fields": ["content", "heading", "questions", "reference"],
                                "type": "best_fields",
                                "operator": "or" 
                            }
                        }
                    ],
                    "should": [
                        {
                            "script_score": {
                                "query": {"match_all": {}},
                                "script": {
                                    "source": """
                                    double vector_score = cosineSimilarity(params.query_vector, params.vector_field) + 1.0;
                                    double text_score = _score;
                                    return 0.7 * vector_score + 0.3 * text_score;
                                    """,
                                    "params": {
                                        "query_vector": embedding,  
                                        "vector_field": "embedded_content"
                                    }
                                }
                            }
                        }
                    ]
                }
            }
        }

    conn = Elasticsearch(
    hosts="https://512baa73334c4127ade77bc2dfa2ef02.eastus2.azure.elastic-cloud.com:443",
    basic_auth=("elastic", "XRzYOABuG17Yv32UKAFySw41")
    )
    response = conn.search(index="abudhabi-policies-legislations-v4", body=search_body, size=5)

    return response

@tool
def LLM(query, input_documents: Union[str, List[str]], ) -> str:
    """
    Pre trained LLM useful for summarizing, answering questions with given RAG documents 
    Args:
        query (str): Task
        input_documents (Union[str, List[str]]): The input Documents

    Returns:
        response (str): Response
    """
    llm = ChatGroq(
        temperature=0,
        model_name="llama3-70b-8192", #"llama-3.1-8b-instant",
        # api_key="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",
        api_key="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"
    )

    prompt = PromptTemplate(
    template="""<|begin_of_text|>
        <|start_header_id|>system<|end_header_id|>  
        You are a legal assistant, your task is to answer the user question based on given documents. 

        Legal Documents: {documents}

        Response Instructions:
        1. Formulate a concise, informative response based on the extracted information for the users question.
        2. Reference specific sections or clauses of the documents to support your answer.
        3. Do not answer the question based general knowledge, if the given documents do not have sufficient information to answer the question then send response as "I don't have knowledge to answer your question"
        4. Do not start your response like Based on the provided documents,  here are the details about ...
        5. Make sure that you are including any quantitative mesures

        User Question: {question}

        <|eot_id|>
        <|start_header_id|>assistant<|end_header_id|>
        """,
        input_variables=["question", "documents"],  # Fixed typo here
    )

    # Create the question router
    question_router = prompt | llm | StrOutputParser() 

    response = question_router.invoke(
        {
            "question": query, 
            "documents": input_documents  # Use the corrected variable name
        }
    )

    return response.content

In [289]:
tools = [Retriever]

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are a helpful assistant to execute the list of task in sequence with given tools.
            The input will be in the following format. You have to first perform sequentially task_number 1, task_number 2 ..,
            {{
                "tasks": [
                    {{
                        "task_number": 1,
                        "idea": "Extract the document related to ground water strategy plan in UAE at 2018 from elasticsearch using Retriever tool",
                        "tool": "Retriever",
                        "inputs": "ground water strategy plan in UAE at 2018"
                    }},
                    {{
                        "task_number": 2,
                        "idea": "Extract the document related to ground water strategy plan in UAE at 2019 from elasticsearch using Retriever tool",
                        "tool": "Retriever",
                        "inputs": "ground water strategy plan in UAE at 2019"
                    }},
                    {{
                        "task_number": 3,
                        "idea": "Extract the document related to ground water strategy plan in UAE at 2020 from elasticsearch using Retriever tool",
                        "tool": "Retriever",
                        "inputs": "ground water strategy plan in UAE at 2020"
                    }},
                    {{
                        "task_number": 4,
                        "idea": "Compare the result of task_number 1, task_number 2 and task_number 3 using LLM tool",
                        "tool": "LLM",
                        "inputs": "Summarize the traffic roles data using task_number 1 response, task_number 2 response and task_number 3 response"
                    }}
                ]
            }}
            """,
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{tasks}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

# Construct the Tools agent
agent = create_tool_calling_agent(llm, tools, prompt)

In [290]:
print(prompt.messages[0])

prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='\n            You are a helpful assistant to execute the list of task in sequence with given tools.\n            The input will be in the following format. You have to first perform sequentially task_number 1, task_number 2 ..,\n            {{\n                "tasks": [\n                    {{\n                        "task_number": 1,\n                        "idea": "Extract the document related to ground water strategy plan in UAE at 2018 from elasticsearch using Retriever tool",\n                        "tool": "Retriever",\n                        "inputs": "ground water strategy plan in UAE at 2018"\n                    }},\n                    {{\n                        "task_number": 2,\n                        "idea": "Extract the document related to ground water strategy plan in UAE at 2019 from elasticsearch using Retriever tool",\n                        "tool": "Retriever",\n       

In [291]:
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke({"tasks": "result.content"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Retriever` with `{'input_texts': 'ground water strategy plan in UAE at 2018'}`


[0m

<All keys matched successfully>
  response = conn.search(index="abudhabi-policies-legislations-v4", body=search_body, size=5)


[36;1m[1;3m{'took': 7, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 151, 'relation': 'eq'}, 'max_score': 13.822568, 'hits': [{'_index': 'abudhabi-policies-legislations-v4', '_id': 'qi9T1ZIBMviUsWSjoZnM', '_score': 13.822568, '_ignored': ['content.keyword'], '_source': {'heading': 'executive council resolution no. (182) of 2022 concerning the general policy for the management, regulation and protection of groundwater in the emirate of abu dhabi', 'content': "executive council resolution no. (182) of 2022 concerning the general policy for the management, regulation and protection of groundwater in the emirate of abu dhabi the executive council has decided the following: the general policy for the management, regulation and protection of groundwater in the emirate of abu dhabi shall be approved in accordance with the (attached form). issuance date: 13 december 2022 corresponding to: 19 jumada al owla 1444 hijrisaif 

<All keys matched successfully>
  response = conn.search(index="abudhabi-policies-legislations-v4", body=search_body, size=5)


[36;1m[1;3m{'took': 23, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 151, 'relation': 'eq'}, 'max_score': 14.405399, 'hits': [{'_index': 'abudhabi-policies-legislations-v4', '_id': 'qi9T1ZIBMviUsWSjoZnM', '_score': 14.405399, '_ignored': ['content.keyword'], '_source': {'heading': 'executive council resolution no. (182) of 2022 concerning the general policy for the management, regulation and protection of groundwater in the emirate of abu dhabi', 'content': "executive council resolution no. (182) of 2022 concerning the general policy for the management, regulation and protection of groundwater in the emirate of abu dhabi the executive council has decided the following: the general policy for the management, regulation and protection of groundwater in the emirate of abu dhabi shall be approved in accordance with the (attached form). issuance date: 13 december 2022 corresponding to: 19 jumada al owla 1444 hijrisaif

<All keys matched successfully>
  response = conn.search(index="abudhabi-policies-legislations-v4", body=search_body, size=5)


[36;1m[1;3m{'took': 17, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 151, 'relation': 'eq'}, 'max_score': 14.731892, 'hits': [{'_index': 'abudhabi-policies-legislations-v4', '_id': 'Gi9T1ZIBMviUsWSjQpli', '_score': 14.731892, '_ignored': ['content.keyword'], '_source': {'heading': 'chairman of the executive council resolution no. (31) of 2021 concerning the appointment of the director-general of abu dhabi housing authority', 'content': " we, mohamed bin zayed al nahyan, crown prince, chairman of the executive council, having reviewed: • law no. (1) of 1974 concerning the reorganisation of the governmental body in the emirate of abu dhabi and its amendments; • law no. (4) of 2012 concerning the establishment of abu dhabi housing authority; • law no. (6) of 2016 concerning the human resources in the emirate of abu dhabi and its amendments; • chairman of the executive council resolution no. (11) of 2020 concerning t

APIStatusError: Error code: 413 - {'error': {'message': 'Request too large for model `llama-3.2-90b-text-preview` in organization `org_01jbaf7n3pfpz81079dy9nnfqc` on tokens per minute (TPM): Limit 7000, Requested 112053, please reduce your message size and try again. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}

In [164]:
prompt = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>  
    You are a expert in making plans to solve the task step by step.

    Guidelines:
    1. For each plan, specify which external tool to use and the corresponding input for evidence retrieval. 
    2. Store the evidence in a variable #E for later reference. (Plan, #E1, Plan, #E2, Plan, ...)
    3. if the user question is stright forward and can be answered without dividing the plan then return only one plan

    Tools can be one of the following:
    (1) Retriever[input]: Worker that searches relevent document from elasticsearch private knowledgebase. 
    And this is useful when you need to find Abu Dhabi laws and regulations realted answers about a specific topic. 
    The input should be a search query.
    (2) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general world knowledge and common sense.
    Prioritize it when you are confident in solving the problem yourself. Input can be any instruction.

    Provide the response only in the given example format you must not add any additional explanations or commentary.

    For example,
    Task: Compare the ground water strategy plan in UAE between 2018 and 2020.
    Plan: Perform elasticsearch for ground water strategy plan in UAE at 2018. #E1 = Retriever[ground water strategy plan in UAE at 2018].
    Plan: Perform elasticsearch for ground water strategy plan in UAE at 2020. #E2 = Retriever[ground water strategy plan in UAE at 2020].
    Plan: Compare #E1 and #E2 to get the difference between the two plans. #E4 = LLM[Compare the #E1 and #E2, given #E1 and #E2]

    Begin! 
    Describe your plans with rich details. Each Plan should be followed by only one #E.

    Task: {task}
    """

In [165]:
# task = "Compare is the laws implemented for traffic rules in Abudhabi at 2021 and 2026"
# task = "Summarize the trafic roles between 2000 to 2010"
task = "Compare the traffic roles between 2000 to 2010"

In [166]:
result = llm.invoke(prompt.format(task=task))

In [167]:
print(result.content)

Plan: Perform elasticsearch for traffic laws and regulations in UAE at 2000. #E1 = Retriever[traffic laws and regulations in UAE at 2000].
Plan: Perform elasticsearch for traffic laws and regulations in UAE at 2010. #E2 = Retriever[traffic laws and regulations in UAE at 2010].
Plan: Extract traffic roles from #E1. #E3 = LLM[Extract traffic roles from #E1, given #E1].
Plan: Extract traffic roles from #E2. #E4 = LLM[Extract traffic roles from #E2, given #E2].
Plan: Compare #E3 and #E4 to get the difference between the two traffic roles. #E5 = LLM[Compare #E3 and #E4, given #E3 and #E4].


In [168]:
import re

regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
matches = re.findall(regex_pattern, result.content)
matches

[('Perform elasticsearch for traffic laws and regulations in UAE at 2000. ',
  '#E1',
  'Retriever',
  'traffic laws and regulations in UAE at 2000'),
 ('Perform elasticsearch for traffic laws and regulations in UAE at 2010. ',
  '#E2',
  'Retriever',
  'traffic laws and regulations in UAE at 2010'),
 ('Extract traffic roles from #E1. ',
  '#E3',
  'LLM',
  'Extract traffic roles from #E1, given #E1'),
 ('Extract traffic roles from #E2. ',
  '#E4',
  'LLM',
  'Extract traffic roles from #E2, given #E2'),
 ('Compare #E3 and #E4 to get the difference between the two traffic roles. ',
  '#E5',
  'LLM',
  'Compare #E3 and #E4, given #E3 and #E4')]

'Retriever'