In [1]:
!pip install langchain
!pip install accelerate
!pip install bitsandbytes





Collecting accelerate
  Downloading accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)
Downloading accelerate-1.7.0-py3-none-any.whl (362 kB)
Installing collected packages: accelerate
Successfully installed accelerate-1.7.0




Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-win_amd64.whl.metadata (10 kB)
Downloading bitsandbytes-0.46.0-py3-none-win_amd64.whl (66.5 MB)
   ---------------------------------------- 0.0/66.5 MB ? eta -:--:--
   -- ------------------------------------- 4.7/66.5 MB 28.4 MB/s eta 0:00:03
   ------- -------------------------------- 12.6/66.5 MB 34.2 MB/s eta 0:00:02
   --------- ------------------------------ 16.3/66.5 MB 28.4 MB/s eta 0:00:02
   ------------ --------------------------- 20.7/66.5 MB 26.1 MB/s eta 0:00:02
   -------------- ------------------------- 24.9/66.5 MB 24.6 MB/s eta 0:00:02
   ----------------- ---------------------- 29.6/66.5 MB 24.1 MB/s eta 0:00:02
   -------------------- ------------------- 33.8/66.5 MB 23.6 MB/s eta 0:00:02
   ----------------------- ---------------- 38.3/66.5 MB 23.4 MB/s eta 0:00:02
   ------------------------- -------------- 43.0/66.5 MB 23.4 MB/s eta 0:00:02
   ---------------------------- ----------- 47.7/66.5 MB 



NOTE: You might need to restart the session after the pip install block

In [2]:
import warnings
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers.models.mistral.modeling_mistral import MistralForCausalLM
from transformers.models.llama.tokenization_llama_fast import LlamaTokenizerFast

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
warnings.filterwarnings('ignore')

In [4]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, quantization_config=quantization_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2.
401 Client Error. (Request ID: Root=1-68433853-232b60e97873fa9f274cd30b;3c1d9717-c4a5-47a8-be51-9d1436bb68f1)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.2 is restricted. You must have access to it and be authenticated to access it. Please log in.

In [None]:
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
from typing import Optional, List, Mapping, Any

In [None]:
class CustomLLMMistral(LLM):
    model: MistralForCausalLM
    tokenizer: LlamaTokenizerFast

    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(self, prompt: str, stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None) -> str:

        messages = [
         {"role": "user", "content": prompt},
        ]

        encodeds = self.tokenizer.apply_chat_template(messages, return_tensors="pt")
        model_inputs = encodeds.to(self.model.device)

        generated_ids = self.model.generate(model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.eos_token_id, top_k=4, temperature=0.7)
        decoded = self.tokenizer.batch_decode(generated_ids)

        output = decoded[0].split("[/INST]")[1].replace("</s>", "").strip()

        if stop is not None:
          for word in stop:
            output = output.split(word)[0].strip()

        # Mistral 7B sometimes fails to properly close the Markdown Snippets.
        # If they are not correctly closed, Langchain will struggle to parse the output.
        while not output.endswith("```"):
          output += "`"

        return output

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"model": self.model}

In [None]:
llm = CustomLLMMistral(model=model, tokenizer=tokenizer)

In [None]:
!pip install wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11680 sha256=9725e095c69b2d5158e7f3c68b2376f85e3b1320565a9aa13904432354219623
  Stored in directory: /root/.cache/pip/wheels/5e/b6/c5/93f3dec388ae76edc830cb42901bb0232504dfc0df02fc50de
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


### Tools

In [None]:
import numexpr as ne
from langchain.tools import WikipediaQueryRun, BaseTool
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.agents import Tool

We limit the number of results to 1 and the maximum number of characters to 2500. This limitation is imposed because, although Mistral 7B can support prompts of up to 32000 tokens, using a free Colab account would not provide sufficient memory for overly large inputs. But feel free to experiment changing the parameters.

In [None]:
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=2500))

In [None]:
print(wikipedia.run("Deep Learning"))

Page: Deep learning
Summary: Deep learning is the subset of machine learning methods based on artificial neural networks (ANNs) with representation learning. The adjective "deep" refers to the use of multiple layers in the network. Methods used can be either supervised, semi-supervised or unsupervised.Deep-learning architectures such as deep neural networks, deep belief networks, recurrent neural networks, convolutional neural networks and transformers have been applied to fields including computer vision, speech recognition, natural language processing, machine translation, bioinformatics, drug design, medical image analysis, climate science, material inspection and board game programs, where they have produced results comparable to and in some cases surpassing human expert performance.Artificial neural networks were inspired by information processing and distributed communication nodes in biological systems. ANNs have various differences from biological brains. Specifically, artifici

In [None]:
wikipedia_tool = Tool(
    name="wikipedia",
    description="Never search for more than one concept at a single step. If you need to compare two concepts, search for each one individually. Syntax: string with a simple concept",
    func=wikipedia.run
)

In [None]:
class Calculator(BaseTool):
    name = "calculator"
    description = "Use this tool for math operations. It requires numexpr syntax. Use it always you need to solve any math operation. Be sure syntax is correct."

    def _run(self, expression: str):
      try:
        return ne.evaluate(expression).item()
      except Exception:
        return "This is not a numexpr valid syntax. Try a different syntax."

    def _arun(self, radius: int):
        raise NotImplementedError("This tool does not support async")

In [None]:
calculator_tool = Calculator()
calculator_tool.run("2+3")

5

In [None]:
tools = [wikipedia_tool, calculator_tool]

### Customizing the Prompt

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [None]:
system="""
You are designed to solve tasks. Each task requires multiple steps that are represented by a markdown code snippet of a json blob.
The json structure should contain the following keys:
thought -> your thoughts
action -> name of a tool
action_input -> parameters to send to the tool

These are the tools you can use: {tool_names}.

These are the tools descriptions:

{tools}

If you have enough information to answer the query use the tool "Final Answer". Its parameters is the solution.
If there is not enough information, keep trying.

"""

In [None]:
human="""
Add the word "STOP" after each markdown snippet. Example:

```json
{{"thought": "<your thoughts>",
 "action": "<tool name or Final Answer to give a final answer>",
 "action_input": "<tool parameters or the final output"}}
```
STOP

This is my query="{input}". Write only the next step needed to solve it.
Your answer should be based in the previous tools executions, even if you think you know the answer.
Remember to add STOP after each snippet.

These were the previous steps given to solve this query and the information you already gathered:
"""

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", human),
        MessagesPlaceholder("agent_scratchpad")
    ]
)

### Building the Agent

In [None]:
from langchain.agents import create_json_chat_agent, AgentExecutor
from langchain.memory import ConversationBufferMemory

In [None]:
agent = create_json_chat_agent(
    tools = tools,
    llm = llm,
    prompt = prompt,
    stop_sequence = ["STOP"],
    template_tool_response = "{observation}"
)

In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [None]:
#agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True, memory=memory)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

### Tests

In [None]:
agent_executor.invoke({"input": "How much is 23 plus 17?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{"thought": "This is a mathematical problem that can be solved using the calculator tool.",
 "action": "calculator",
 "action_input": "23 + 17"}```[0m[33;1m[1;3m40[0m[32;1m[1;3m```json
{"thought": "The calculation was correct, the result of 23 plus 17 is 40.",
 "action": "Final Answer",
 "action_input": "40"}```[0m

[1m> Finished chain.[0m


{'input': 'How much is 23 plus 17?', 'output': '40'}

In [None]:
agent_executor.invoke({"input": "What is the capital of France?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{"thought": "The capital of France is a concept that can be found on Wikipedia.",
 "action": "wikipedia",
 "action_input": "capital of France"}```[0m[36;1m[1;3mPage: Paris
Summary: Paris is the capital and most populous city of France. With an official estimated population of 2,102,650 residents as of 1 January 2023 in an area of more than 105 km2 (41 sq mi), Paris is the fourth-most populated city in the European Union and the 30th most densely populated city in the world in 2022. Since the 17th century, Paris has been one of the world's major centres of finance, diplomacy, commerce, culture, fashion, and gastronomy. For its leading role in the arts and sciences, as well as its early and extensive system of street lighting, in the 19th century, it became known as the City of Light.The City of Paris is the centre of the Île-de-France region, or Paris Region, with an official estimated population of 12,271,794 inhab

{'input': 'What is the capital of France?', 'output': 'Paris'}

In [None]:
agent_executor.invoke({"input": "Who was the inventor of the Radio?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{"thought": "The inventor of the radio is not a simple concept. I will search for more information using the 'wikipedia' tool.",
 "action": "wikipedia",
 "action_input": "radio inventor"}
```[0m[36;1m[1;3mPage: Invention of radio
Summary: The invention of radio communication was preceded by many decades of establishing theoretical underpinnings, discovery and experimental investigation of radio waves, and engineering and technical developments related to their transmission and detection. These developments allowed Guglielmo Marconi to turn radio waves into a wireless communication system.
The idea that the wires needed for electrical telegraph could be eliminated, creating a wireless telegraph, had been around for a while before the establishment of radio-based communication. Inventors attempted to build systems based on electric conduction, electromagnetic induction, or on other theoretical ideas. Several inventor

{'input': 'Who was the inventor of the Radio?',
 'output': 'Guglielmo Marconi is the inventor of the practical radio wave–based wireless telegraph system and a Nobel laureate.'}

In [None]:
agent_executor.invoke({"input": "What is the double of the population of Madrid?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{"thought": "The population of Madrid is the information needed to find its double. I will use the wikipedia tool to find the population of Madrid.",
 "action": "wikipedia",
 "action_input": "Population of Madrid"
}```[0m[36;1m[1;3mPage: Madrid
Summary: Madrid ( mə-DRID, Spanish: [maˈðɾið] ) is the capital and most populous city of Spain. The city has almost 3.4 million inhabitants and a metropolitan area population of approximately 7 million. It is the second-largest city in the European Union (EU), and its monocentric metropolitan area is the second-largest in the EU. The municipality covers 604.3 km2 (233.3 sq mi) geographical area. Madrid lies on the River Manzanares in the central part of the Iberian Peninsula at about 650 meters above mean sea level. The capital city of both Spain and the surrounding autonomous community of Madrid (since 1983), it is also the political, economic, and cultural centre of the co

{'input': 'What is the double of the population of Madrid?',
 'output': '6,800,000'}

The following example usually fails due to limits in the model's reasoning capacity

In [None]:
agent_executor.invoke({"input": "Who is older, Tom Hanks or Kevin Costner?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{"thought": "I need to find the birth years of Tom Hanks and Kevin Costner to determine who is older.",
 "action": "wikipedia",
 "action_input": "Tom Hanks"}```[0m[36;1m[1;3mPage: Tom Hanks
Summary: Thomas Jeffrey Hanks (born July 9, 1956) is an American actor and filmmaker. Known for both his comedic and dramatic roles, he is one of the most popular and recognizable film stars worldwide, and is regarded as an American cultural icon. Hanks' films have grossed more than $4.9 billion in North America and more than $9.96 billion worldwide, making him the fourth-highest-grossing actor in North America. He has received numerous honors including the AFI Life Achievement Award in 2002, the Kennedy Center Honor in 2014, the Presidential Medal of Freedom and the French Legion of Honor both in 2016, as well as the Golden Globe Cecil B. DeMille Award in 2020.Hanks made his breakthrough with leading roles in a series of comedy

{'input': 'Who is older, Tom Hanks or Kevin Costner?',
 'output': 'Tom Hanks is older than Kevin Costner.'}