## Load Dependencies

In [None]:
from langchain.agents import (
    Tool,
    AgentExecutor,
    LLMSingleActionAgent,
    AgentOutputParser,
)
from langchain.prompts import StringPromptTemplate
from langchain import LLMChain
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish
import re
from langchain.llms import LlamaCpp

#DuckDuckGo
from langchain.tools import DuckDuckGoSearchRun
#Playwright
from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit
from langchain.tools.playwright.utils import (
    create_async_playwright_browser,
    create_sync_playwright_browser,  # A synchronous browser is available, though it isn't compatible with jupyter.
)

#Python Tools
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# This import is required only for jupyter notebooks, since they have their own eventloop
import nest_asyncio

nest_asyncio.apply()

#Files ToolKit
from langchain.tools.file_management import (
    ReadFileTool,
    CopyFileTool,
    DeleteFileTool,
    MoveFileTool,
    WriteFileTool,
    ListDirectoryTool,
)
from langchain.agents.agent_toolkits import FileManagementToolkit
from tempfile import TemporaryDirectory

# We'll make a temporary directory to avoid clutter
working_directory = TemporaryDirectory()

#Load HuggingFaceHub Token
import os
from dotenv import load_dotenv

load_dotenv()
huggingfacehub_api_key= os.getenv('HUGGINGFACEHUB_API_TOKEN')

## Load Tools

In [None]:
async_browser = create_async_playwright_browser()
scrape_toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)
scraping_tools = scrape_toolkit.get_tools()
tools_by_name = {tool.name: tool for tool in scraping_tools}
print(tools_by_name.keys())
navigate_tool = tools_by_name["navigate_browser"]
extract_text = tools_by_name["extract_text"]
files_tools = FileManagementToolkit(
    root_dir=str(working_directory.name),
    selected_tools=["read_file", "write_file", "list_directory"],
).get_tools()
read_tool, write_tool, list_tool = files_tools

In [None]:
DuckDuckGoSearch = DuckDuckGoSearchRun()
PythonTool =PythonREPLTool()

tools = [
    Tool(
        name="DuckDuckGo",
        func=DuckDuckGoSearch.run,
        description="useful for when you need to answer questions about current events",
    ),
    Tool(
        name="Read directory",
        func=read_tool.run,
        description="Read files within a directory",
    ),
    Tool(
        name="Write directory",
        func=write_tool.run,
        description="Write files to a directory",
    ),
    Tool(
        name="Extract text from webpage",
        func=extract_text.run,
        description="Extracts text from a webpage",
    ),
    extract_text,
    navigate_tool, PythonTool, 
]

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

In [None]:
docs = [
    Document(page_content=t.description, metadata={"index": i})
    for i, t in enumerate(tools)
]

In [128]:
vector_store = FAISS.from_documents(docs, HuggingFaceEmbeddings())

In [129]:
retriever = vector_store.as_retriever()


def get_tools(query):
    docs = retriever.get_relevant_documents(query)
    valid_docs = [d for d in docs if d.metadata["index"] < len(tools)]
    return [tools[d.metadata["index"]] for d in valid_docs]


In [130]:
# Set up the base template
template = """Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to have fun.

Question: {input}
{agent_scratchpad}"""

In [131]:
from typing import Callable


# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    ############## NEW ######################
    # The list of tools available
    tools_getter: Callable

    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        ############## NEW ######################
        tools = self.tools_getter(kwargs["input"])
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join(
            [f"{tool.name}: {tool.description}" for tool in tools]
        )
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in tools])
        return self.template.format(**kwargs)

In [132]:
prompt = CustomPromptTemplate(
    template=template,
    tools_getter=get_tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"],
)

In [133]:
class CustomOutputParser(AgentOutputParser):
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(
            tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output
        )

In [134]:
output_parser = CustomOutputParser()

In [135]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
model_path = "../../../../../Models/llama-2-7b-chat.ggmlv3.q8_0.bin"
n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    input={"temperature": 0.75, "max_length": 2000, "top_p": 1},
    callback_manager=callback_manager,
    verbose=True,
)

llama.cpp: loading model from ../../../../../Models/llama-2-7b-chat.ggmlv3.q8_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 7 (mostly Q8_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 7130.73 MB (+  256.00 MB per state)
llama_new_context_with_model: kv self size  =  25

In [136]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [137]:
tools = get_tools("whats the weather like in Ferndale?")
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain,
    output_parser=output_parser,
    stop=["\nObservation:"],
    allowed_tools=tool_names,
)

In [138]:
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, verbose=True, handle_parsing_errors=True,
)

In [139]:
agent_executor.run("What's the weather in SF?")



[1m> Entering new AgentExecutor chain...[0m

Thought: Hmm, I should check DuckDuckGo for the latest information on the current weather in San Francisco.
Action: DuckDuckGo
Action Input: "San Francisco weather"[32;1m[1;3m
Thought: Hmm, I should check DuckDuckGo for the latest information on the current weather in San Francisco.
Action: DuckDuckGo
Action Input: "San Francisco weather"[0m


llama_print_timings:        load time = 78502.40 ms
llama_print_timings:      sample time =    30.59 ms /    48 runs   (    0.64 ms per token,  1569.04 tokens per second)
llama_print_timings: prompt eval time = 78502.35 ms /   192 tokens (  408.87 ms per token,     2.45 tokens per second)
llama_print_timings:        eval time = 19677.39 ms /    47 runs   (  418.67 ms per token,     2.39 tokens per second)
llama_print_timings:       total time = 98324.50 ms






Llama.generate: prefix-match hit


 I found the answer to the question through DuckDuckGo. Let's write the file to the


llama_print_timings:        load time = 78502.40 ms
llama_print_timings:      sample time =    13.36 ms /    22 runs   (    0.61 ms per token,  1646.83 tokens per second)
llama_print_timings: prompt eval time = 106574.43 ms /   251 tokens (  424.60 ms per token,     2.36 tokens per second)
llama_print_timings:        eval time =  8926.49 ms /    21 runs   (  425.07 ms per token,     2.35 tokens per second)
llama_print_timings:       total time = 115568.43 ms


ValueError: Could not parse LLM output: ` I found the answer to the question through DuckDuckGo. Let's write the file to the`