# Conceitos avançados de Models

### Prompt few-shot

In [2]:
from langchain_openai import OpenAI

llm = OpenAI(model='gpt-4o-mini')

PydanticUserError: `OpenAI` is not fully defined; you should define `BaseCache`, then call `OpenAI.model_rebuild()`.

For further information visit https://errors.pydantic.dev/2.11/u/class-not-fully-defined

In [2]:
from langchain_core.messages import HumanMessage, AIMessage

mensagens = [
    HumanMessage(content='Quanto é 1 + 1?'),
    AIMessage(content='2'),
    HumanMessage(content='Quanto é 10 * 5?'),
    AIMessage(content='50'),
    HumanMessage(content='Quanto é 10 + 3?'),
]

chat.invoke(mensagens)

AIMessage(content='10 + 3 é igual a 13.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 52, 'total_tokens': 63, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-3d822bd9-58c6-4c02-94ab-88aabe0cb3bd-0', usage_metadata={'input_tokens': 52, 'output_tokens': 11, 'total_tokens': 63, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Isto é similar a formação de mensagens da api da OpenAI, mas com uma sintaxe diferente:

```python
mensagens = [
    {'role': 'user', 'content': 'Quanto é 1 + 1'},
    {'role': 'assistant', 'content': '2'},
    {'role': 'user', 'content': 'Quanto é 10 * 5'},
    {'role': 'assistant', 'content': '50'},
    {'role': 'user', 'content': 'Quanto é 10 + 3'},
]
```

## Utilizando outros modelos

In [1]:
from langchain_huggingface.chat_models.huggingface import ChatHuggingFace  # Atualizando importação!
from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint # Atualizando importação!

In [5]:
modelo = 'mistralai/Mixtral-8x7B-Instruct-v0.1'

llm = HuggingFaceEndpoint(repo_id=modelo)
chat = ChatHuggingFace(llm=llm)

In [6]:
from langchain_core.messages import HumanMessage, AIMessage

mensagens = [
    HumanMessage(content='Quanto é 1 + 1?'),
    AIMessage(content='2'),
    HumanMessage(content='Quanto é 10 * 5?'),
    AIMessage(content='50'),
    HumanMessage(content='Quanto é 10 + 3?'),
]

chat.invoke(mensagens)

AIMessage(content=' 13\\. Se você quiser calcular outra operação, sinta-se à vontade para me perguntar! \\:)', additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=36, prompt_tokens=58, total_tokens=94), 'model': '', 'finish_reason': 'stop'}, id='run-2db74aff-7a11-46f7-aa42-f426f7382495-0')

A estrutura de chat_model utiliza a estrutura de llm como backend

In [7]:
import langchain

langchain.debug = True
chat.invoke(mensagens)
langchain.debug = False

[32;1m[1;3m[llm/start][0m [1m[llm:ChatHuggingFace] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Quanto é 1 + 1?\nAI: 2\nHuman: Quanto é 10 * 5?\nAI: 50\nHuman: Quanto é 10 + 3?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:ChatHuggingFace] [195ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " 13\\. Se você quiser calcular outra operação, sinta-se à vontade para me perguntar! \\:)",
        "generation_info": {
          "finish_reason": "stop"
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": " 13\\. Se você quiser calcular outra operação, sinta-se à vontade para me perguntar! \\:)",
            "response_metadata": {
              "token_usage": {
                "completion_tokens": 36,
     

> Atenção, nas versões mais atuais de langchain é recomendado utilizar o método set_debug para ativar o modo de debug, da seguinte forma:

In [7]:
from langchain.globals import set_debug

set_debug(True)
chat.invoke(mensagens)
set_debug(False)

[32;1m[1;3m[llm/start][0m [1m[llm:ChatHuggingFace] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Quanto é 1 + 1?\nAI: 2\nHuman: Quanto é 10 * 5?\nAI: 50\nHuman: Quanto é 10 + 3?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:ChatHuggingFace] [780ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " 13\\*",
        "generation_info": {
          "finish_reason": "stop"
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": " 13\\*",
            "response_metadata": {
              "token_usage": {
                "completion_tokens": 6,
                "prompt_tokens": 58,
                "total_tokens": 64
              },
              "model": "",
              "finish_reason": "stop"
            },
   

Outros modelos disponíveis:

https://python.langchain.com/docs/integrations/chat/

## Caching

### Cache em memória

In [2]:
modelo = 'mistralai/Mixtral-8x7B-Instruct-v0.1'

llm = HuggingFaceEndpoint(repo_id=modelo)
chat = ChatHuggingFace(llm=llm)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langchain_core.messages import HumanMessage, SystemMessage

mensagens = [
    SystemMessage(content='Você é um assistente engraçado.'),
    HumanMessage(content='Quanto é 1 + 1?')
]

In [4]:
from langchain.cache import InMemoryCache
from langchain.globals import set_llm_cache

set_llm_cache(InMemoryCache())

Rodandando a primeira vez

In [5]:
%%time

chat.invoke(mensagens)

CPU times: user 9.41 ms, sys: 2.03 ms, total: 11.4 ms
Wall time: 1.29 s


AIMessage(content=" Well, I'm here to add a bit of humor to your day, not just computation! But since you asked, the answer is 2. Unless, of course, we're talking about the imaginary realm where 1+1 might equal a elephant or a pizza, because in the land of jokes, anything is possible!", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=70, prompt_tokens=31, total_tokens=101), 'model': '', 'finish_reason': 'stop'}, id='run-1f1f7b24-2095-45d2-8012-9d977a73c281-0')

Rodando novamente

In [6]:
%%time

chat.invoke(mensagens)

CPU times: user 496 μs, sys: 0 ns, total: 496 μs
Wall time: 522 μs


AIMessage(content=" Well, I'm here to add a bit of humor to your day, not just computation! But since you asked, the answer is 2. Unless, of course, we're talking about the imaginary realm where 1+1 might equal a elephant or a pizza, because in the land of jokes, anything is possible!", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=70, prompt_tokens=31, total_tokens=101), 'model': '', 'finish_reason': 'stop'}, id='run-1f1f7b24-2095-45d2-8012-9d977a73c281-0')

### Cache SQLite

In [7]:
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

set_llm_cache(SQLiteCache(database_path='arquivos/lancgchain_cache_db.sqlite'))

In [8]:
%%time

chat.invoke(mensagens)

CPU times: user 12.7 ms, sys: 1.06 ms, total: 13.7 ms
Wall time: 227 ms


AIMessage(content=" Well, I'm here to add a bit of humor to your day, not just computation! But since you asked, the answer is 2. Unless, of course, we're talking about the imaginary realm where 1+1 might equal a elephant or a pizza, because in the land of jokes, anything is possible!", additional_kwargs={}, response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=70, prompt_tokens=31, total_tokens=101), 'model': '', 'finish_reason': 'stop'}, id='run-2ace7517-2e29-4e5d-944d-42da8ed59c54-0')

In [9]:
%%time

chat.invoke(mensagens)

CPU times: user 2.04 ms, sys: 0 ns, total: 2.04 ms
Wall time: 1.46 ms


AIMessage(content=" Well, I'm here to add a bit of humor to your day, not just computation! But since you asked, the answer is 2. Unless, of course, we're talking about the imaginary realm where 1+1 might equal a elephant or a pizza, because in the land of jokes, anything is possible!", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 70, 'prompt_tokens': 31, 'total_tokens': 101}, 'model': '', 'finish_reason': 'stop'}, id='run-2ace7517-2e29-4e5d-944d-42da8ed59c54-0')

In [10]:
%%time

chat.invoke(mensagens)

CPU times: user 1.77 ms, sys: 0 ns, total: 1.77 ms
Wall time: 1.39 ms


AIMessage(content=" Well, I'm here to add a bit of humor to your day, not just computation! But since you asked, the answer is 2. Unless, of course, we're talking about the imaginary realm where 1+1 might equal a elephant or a pizza, because in the land of jokes, anything is possible!", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 70, 'prompt_tokens': 31, 'total_tokens': 101}, 'model': '', 'finish_reason': 'stop'}, id='run-2ace7517-2e29-4e5d-944d-42da8ed59c54-0')