Langchain supports a restricted list of opensource models. I used code llama in this case, which was one of the only models that did not ask for a meta key AND was usable without running on a GPU. We first install the necessary libraries

In [2]:
import sys
!{sys.executable} -m pip install auto-gptq
!{sys.executable} -m pip install accelerate
!{sys.executable} -m pip install optimum
!{sys.executable} -m pip install transformers
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install langchain


Collecting optimum
  Downloading optimum-1.14.1-py3-none-any.whl.metadata (17 kB)
Collecting coloredlogs (from optimum)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
     ---------------------------------------- 0.0/46.0 kB ? eta -:--:--
     ---------------------------------------- 46.0/46.0 kB ? eta 0:00:00
Collecting sentencepiece!=0.1.92,>=0.1.91 (from transformers[sentencepiece]>=4.26.0->optimum)
  Downloading sentencepiece-0.1.99-cp311-cp311-win_amd64.whl (977 kB)
     ---------------------------------------- 0.0/977.5 kB ? eta -:--:--
     ------- ------------------------------ 204.8/977.5 kB 6.3 MB/s eta 0:00:01
     -------------------- ----------------- 522.2/977.5 kB 6.6 MB/s eta 0:00:01
     ----------------------- -------------- 614.4/977.5 kB 6.4 MB/s eta 0:00:01
     ----------------------------- -------- 768.0/977.5 kB 4.4 MB/s eta 0:00:01
     -------------------------------------- 977.5/977.5 kB 4.4 MB/s eta 0:00:00
Collecting protobuf (from transforme

We then need to define our model

In [7]:
import torch
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
 
MODEL_NAME = "codellama/CodeLlama-7b-hf"
#MODEL_NAME = "C:/Users/yokor/.cache/huggingface/hub/models--codellama--CodeLlama-7b-hf/snapshots/bc5283229e2fe411552f55c71657e97edf79066c"
#torch.FloatTensor(1).to('cuda')
                        
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
 
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME
)
 
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15
 
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    generation_config=generation_config,
)
 
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})
llm.invoke("how many letters in the word educa?") #this prompts the llm to give a response


RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 180355072 bytes.

Now we just have to invoke tools from LangChain. These tools will be used by Agents automatically, which means that the models will help choose which tool is best suited for which task.
We can also create new tools as shown right below; they are simply python functions:

In [8]:
from langchain.agents import tool

@tool
def get_word_length(word: str) -> int:
    """Returns the length of a word."""
    return len(word)

tools = [get_word_length]

En bas on peut voire comment définir un prompt qu'on associe à un agent. Le messageplaceholder donne un mot qui ne changent pas, ce qui permet de donner un nom à l'agent. https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/msg_prompt_templates

In [9]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but bad at calculating lengths of words.",
        ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

Now let us try using the tools provided by Langchain: You just need to bind them to our llm, so that they are used at the right time.

In [10]:
from langchain.tools.render import format_tool_to_openai_function

llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [None]:
now we can simply make an agent:

In [11]:
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

In [None]:
#we retry the same prompt as before:
agent.invoke({"input": "how many letters in the word educa?", "intermediate_steps": []})

In [None]:
#For a more complex example, we can reiterate over the same agent:
from langchain.schema.agent import AgentFinish

user_input = "how many letters in the word educa?"
intermediate_steps = []
while True:
    output = agent.invoke(
        {
            "input": user_input,
            "intermediate_steps": intermediate_steps,
        }
    )
    if isinstance(output, AgentFinish):
        final_result = output.return_values["output"]
        break
    else:
        print(f"TOOL NAME: {output.tool}")
        print(f"TOOL INPUT: {output.tool_input}")
        tool = {"get_word_length": get_word_length}[output.tool]
        observation = tool.run(output.tool_input)
        intermediate_steps.append((output, observation))
print(final_result)

In [None]:
#Using AgentExecutor, we can have the same code with a simpler interface:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

agent_executor.invoke({"input": "how many letters in the word educa?"})

In [None]:
#Here we add some memory to make the self-conversation more meaningful:
from langchain.prompts import MessagesPlaceholder

MEMORY_KEY = "chat_history"
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are very powerful assistant, but bad at calculating lengths of words.",
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [None]:
from langchain.schema.messages import AIMessage, HumanMessage

chat_history = []

In [None]:
agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
        "chat_history": lambda x: x["chat_history"],
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [None]:
input1 = "how many letters in the word educa?"
result = agent_executor.invoke({"input": input1, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=input1),
        AIMessage(content=result["output"]),
    ]
)
agent_executor.invoke({"input": "is that a real word?", "chat_history": chat_history})