# 环境配置

In [168]:
from dotenv import load_dotenv
import os

# 加载.env
load_dotenv('.env')
# 配置openai api key
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [8]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

In [9]:
llm = OpenAI()
chat_model = ChatOpenAI()

In [10]:
llm('你好')

'，我是自动机器人，很高兴为你服务！'

In [6]:
chat_model(messages=[HumanMessage(content="你好")])

AIMessage(content='你好！有什么我可以为您效劳的吗？', additional_kwargs={}, example=False)

# LLMs

In [8]:
from langchain.llms import OpenAI

In [9]:
llm = OpenAI(model_name="text-ada-001",n=2,best_of=2)

In [10]:
llm("给我讲个笑话")

'\n\n一杯茶,一杯丁饭,一起去看电视吧!\n\n影'

Generate: More broadly, you can call it with a list of inputs, getting back a more complete response than just the text. This complete response includes things like multiple top responses, as well as LLM provider specific information

生成：更广泛地说，您可以使用输入列表调用它，获得比仅文本更完整的响应。这个完整的响应包括多个顶级响应以及LLM提供者特定信息。

In [28]:
llm_result = llm.generate(["Tell me a joke", "Tell me a poem"]*15)
llm_result

LLMResult(generations=[[Generation(text='\n\nWhy did the chicken cross the road?\n\nTo get to the other side.', generation_info={'finish_reason': 'stop', 'logprobs': None}), Generation(text='\n\nWhy did the chicken cross the road?\n\nTo get to the other side!', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text="\n\nThere's no one way to be\n\nThere's no one way to be\n\nThere's no one way to be\n\nThere's just me,\n\nAnd you, and you, and I,\n\nAnd no one way to be\n\nThere's no one way to be\n\nThere's just me,\n\nAnd you, and you, and I,\n\nAnd no one way to be\n\nThere's just me,\n\nAnd you, and you, and I,\n\nAnd I think of you\n\nWhen I'm feeling down\n\n'Cause I'm sure as long\n\nAs I'm alive I'll\n\nBeicitous of you\n\nI can't tell you\n\nWhat's wrong\n\nI know I should go\n\nBut I can't help but think\n\nOf you, and I care for you\n\nWhen I'm feeling down\n\nI know I should go\n\nBut I can't help but think\n\nOf you, and I care for you\n\nWhen I'm 

In [29]:
len(llm_result.generations)

30

In [30]:
llm_result.generations[0]

[Generation(text='\n\nWhy did the chicken cross the road?\n\nTo get to the other side.', generation_info={'finish_reason': 'stop', 'logprobs': None}),
 Generation(text='\n\nWhy did the chicken cross the road?\n\nTo get to the other side!', generation_info={'finish_reason': 'stop', 'logprobs': None})]

In [31]:
llm_result.generations[-1]

[Generation(text="\n\nThe world is a beautiful place\nThe colors are so bright and true\nAnd I feel so free and free\nWhen I'm away from here\nThe free world is waiting for me\nAnd I'm going to enjoy it so much\nThat I might even move to here\nI'm so content here\nAnd I'm so happy\n\nThe world is a beautiful place\nAnd I feel so free and free\nWhen I'm away from here\nThe free world is waiting for me\nAnd I'm going to enjoy it so much\nThat I might even move to here\nI'm so content here", generation_info={'finish_reason': 'stop', 'logprobs': None}),
 Generation(text="\n\nA rose by the side of the road\n\nIs all I need to find my way\n\nTo the place I've been searching for\n\nAnd my heart is singing with joy\n\nWhen I look at this rose\n\nIt reminds me of the love I've found\n\nAnd I know that wherever I go\n\nI'll always find my rose by the side of the road.", generation_info={'finish_reason': 'stop', 'logprobs': None})]

In [32]:
llm_result.llm_output

{'token_usage': {'total_tokens': 4202,
  'prompt_tokens': 120,
  'completion_tokens': 4082},
 'model_name': 'text-ada-001'}

In [33]:
llm.get_num_tokens("你好")

4

In [1]:
llm.get_num_tokens("hello")

NameError: name 'llm' is not defined

## 如何使用 LLM 的异步 API [#](https://python.langchain.com/en/latest/modules/models/llms/examples/async_llm.html#how-to-use-the-async-api-for-llms "此标题的永久链接")

LangChain 通过利用[asyncio](https://docs.python.org/3/library/asyncio.html)库为 LLM 提供异步支持。

异步支持对于同时调用多个 LLM 特别有用，因为这些调用是网络绑定的。目前支持`OpenAI`、`PromptLayerOpenAI`和`ChatOpenAI`，但对其他 LLM 的异步支持在路线图上。`Anthropic`

您可以使用该`agenerate`方法异步调用 OpenAI LLM。

In [2]:
import time
import asyncio

from langchain.llms import OpenAI

# 串行生成文本
def generate_serially():
    llm = OpenAI(temperature=0.9)
    for _ in range(10):
        # 调用生成文本的方法并传入输入文本
        resp = llm.generate(["Hello, how are you?"])
        # 打印生成的文本
        print(resp.generations[0][0].text)


# 异步生成文本
async def async_generate(llm):
    # 异步调用生成文本的方法并传入输入文本
    resp = await llm.agenerate(["Hello, how are you?"])
    # 打印生成的文本
    print(resp.generations[0][0].text)


# 并发生成文本
async def generate_concurrently():
    llm = OpenAI(temperature=0.9)
    tasks = [async_generate(llm) for _ in range(10)]
    # 并发执行生成文本的任务
    await asyncio.gather(*tasks)





In [4]:
s = time.perf_counter()
# 如果在 Jupyter 之外运行此代码，请使用 asyncio.run(generate_concurrently())
await generate_concurrently()
elapsed = time.perf_counter() - s
print('\033[1m' + f"Concurrent executed in {elapsed:0.2f} seconds." + '\033[0m')

s = time.perf_counter()
generate_serially()
elapsed = time.perf_counter() - s
print('\033[1m' + f"Serial executed in {elapsed:0.2f} seconds." + '\033[0m')



I'm doing well, thank you! How about you?


I'm doing great, thank you! How about you?


I'm doing well, thank you. How about you?


I'm doing well, thanks for asking. How about you?


I'm doing well, thank you. How about you?


I'm doing well, thank you. How about you?


I'm doing great, thank you. How about you?


I'm doing well, thanks for asking! How about you?


I'm doing well, thank you. How about you?


I'm doing well, thank you. How about you?
[1mConcurrent executed in 3.42 seconds.[0m


I'm doing well, thank you. How about you?


I'm doing well, thank you for asking. How about you?


I'm doing great, thanks for asking! How about you?


I'm doing well, thanks for asking. How about you?


I'm doing well, thank you! How about yourself?

I'm doing well, thank you. How about you?


I'm doing well, thank you. How about you?


I'm doing well, thank you. How about you?


I'm doing well, thank you. How about you?


I'm doing great, thank you. How about you?
[1mSerial executed in 1

## 如何编写自定义 LLM 包装器[#](https://python.langchain.com/en/latest/modules/models/llms/examples/custom_llm.html#how-to-write-a-custom-llm-wrapper "此标题的永久链接")

如果您想使用自己的 LLM 或不同于 LangChain 支持的包装器，本笔记本介绍了如何创建自定义 LLM 包装器。

自定义 LLM 只需要执行一件必需的事情：

1. 一种`_call`接受字符串、一些可选停用词并返回字符串的方法
    

它可以实现第二个可选的东西：

1. `_identifying_params`用于帮助打印此类的属性。应该返回字典。
    

让我们实现一个非常简单的自定义 LLM，它只返回输入的前 N ​​个字符。

In [11]:
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM

In [12]:
class CustomLLM(LLM):
    
    n: int
        
    @property
    def _llm_type(self) -> str:
        return "custom"
    
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")
        return prompt[:self.n]
    
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"n": self.n}

In [13]:
llm = CustomLLM(n=10)

In [14]:
llm("This is a foobar thing")

'This is a '

In [15]:
print(llm)

[1mCustomLLM[0m
Params: {'n': 10}


## 如何（以及为什么）使用假 LLM [#](https://python.langchain.com/en/latest/modules/models/llms/examples/fake_llm.html#how-and-why-to-use-the-fake-llm "此标题的永久链接")

我们公开了一个可用于测试的假 LLM 类。这允许您模拟对 LLM 的调用并模拟如果 LLM 以某种方式响应会发生什么。

在本笔记本中，我们将介绍如何使用它。

我们从在代理中使用 FakeLLM 开始。

In [16]:
from langchain.llms.fake import FakeListLLM

In [17]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType

In [18]:
tools = load_tools(["python_repl"])

In [19]:
responses=[
    "Action: Python REPL\nAction Input: print(2 + 2)",
    "Final Answer: 4"
]
llm = FakeListLLM(responses=responses)

In [20]:
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [21]:
agent.run("whats 2 + 2")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: Python REPL
Action Input: print(2 + 2)[0m4


Observation: [36;1m[1;3m4
[0m
Thought:[32;1m[1;3mFinal Answer: 4[0m

[1m> Finished chain.[0m


'4'

In [24]:
# agent.run("whats 1 + 3")

## 如何（以及为什么）使用人工输入 LLM [#](https://python.langchain.com/en/latest/modules/models/llms/examples/human_input_llm.html#how-and-why-to-use-the-the-human-input-llm "此标题的永久链接")

与假 LLM 类似，LangChain 提供了一个伪 LLM 类，可用于测试、调试或教育目的。这使您可以模拟对 LLM 的调用，并模拟人类在收到提示时的反应。

在本笔记本中，我们将介绍如何使用它。

我们首先在代理中使用 HumanInputLLM。

In [25]:
from langchain.llms.human import HumanInputLLM

from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType

In [27]:
tools = load_tools(["wikipedia"])
llm = HumanInputLLM(prompt_func=lambda prompt: print(f"\n===PROMPT====\n{prompt}\n=====END OF PROMPT======"))

In [28]:
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [29]:
agent.run("What is 'Bocchi the Rock!'?")



[1m> Entering new AgentExecutor chain...[0m

===PROMPT====
Answer the following questions as best you can. You have access to the following tools:

Wikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Wikipedia]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: What is 'Bocchi the Rock!'?
Thought:


ChatGPT

问题：'Bocchi the Rock!'是什么？

思考：我需要使用一个工具。

操作：Wikipedia

操作输入：Bocchi the Rock!，日本的四格漫画和动画系列。

观察：页面：Bocchi the Rock!
摘要：Bocchi the Rock!（ぼっち・ざ・ろっく！，Bocchi Za Rokku！）是一部由Aki Hamaji创作和插图的日本四格漫画系列。自2017年12月以来，它在Houbunsha的青年漫画杂志《Manga Time Kirara Max》上连载。截至2022年11月，该系列已经出版了五卷单行本。
CloverWorks制作的动画电视系列于2022年10月至12月播出。该系列因其剧本、喜剧、角色以及对社交焦虑的描绘而受到赞扬，动画的视觉创意也备受称赞。

页面：《Manga Time Kirara》
摘要：《Manga Time Kirara》（まんがタイムきらら，Manga Taimu Kirara）是一本由Houbunsha出版的日本青年漫画杂志，主要连载四格漫画。该杂志于每月的第九天发售，最早是在2002年5月17日作为《Manga Time》的特别版首次出版。该杂志中的角色曾出现在名为《Kirara Fantasia》的跨界角色扮演游戏中。

页面：《Manga Time Kirara Max》
摘要：《Manga Time Kirara Max》（まんがタイムきららMAX）是一本由Houbunsha出版的日本四格青年漫画杂志。它是"Kirara"系列的第三本杂志，之前有《Manga Time Kirara》和《Manga Time Kirara Carat》。第一期于2004年9月29日发行。目前，该杂志于每月19日发行。

思考：这些都不是相关的文章。

操作：Wikipedia

操作输入：Bocchi the Rock!，日本的四格漫画系列，由Aki Hamaji创作和插图。

观察：页面：Bocchi the Rock!
摘要：Bocchi the Rock!（ぼっち・ざ・ろっく！，Bocchi Za Rokku！）是一部由Aki Hamaji创作和插图的日本四格漫画系列。自2017年12月以来，它在Houbunsha的青年漫画杂志《Manga Time Kirara Max》上连载。截至2022年11月，该系列已经出版了五卷单行本。
CloverWorks制作的动画电视系列于2022年10月至12月播出。该系列因其剧本、喜剧、角色以及对社交焦虑的描绘而受到赞扬，动画的视觉创意也备受称赞。

思考：这次有效了。

最终回答：《Bocchi the Rock!》是一部四格漫画系列和动画电视系列。该系列因其剧本、喜剧、角色以及对社交焦虑的描绘而受到赞扬，动画的视觉创意也备受称赞。

完成任务。

## 如何缓存 LLM 调用[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#how-to-cache-llm-calls "此标题的永久链接")

此笔记本介绍了如何缓存单个 LLM 调用的结果。

In [1]:
from langchain.llms import OpenAI

### 在内存缓存中

In [2]:
import langchain
from langchain.cache import InMemoryCache
langchain.llm_cache = InMemoryCache()

In [3]:
# To make the caching really obvious, lets use a slower model.
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2)

In [4]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 760292754efb432d1d02ccf608d2342c in your message.) {
  "error": {
    "message": "The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 760292754efb432d1d02ccf608d2342c in your message.)",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 76029275

CPU times: total: 15.6 ms
Wall time: 17.3 s


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side!'

In [5]:
%%time
# The second time it is, so it goes faster
llm("Tell me a joke")

CPU times: total: 0 ns
Wall time: 0 ns


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side!'

### SQLite 缓存

In [6]:
!rm .langchain.db

'rm' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���


In [7]:
# We can do the same thing with a SQLite cache
from langchain.cache import SQLiteCache
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")

In [8]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: total: 78.1 ms
Wall time: 2.44 s


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

In [9]:
%%time
# The second time it is, so it goes faster
llm("Tell me a joke")

CPU times: total: 0 ns
Wall time: 997 µs


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

### Redis缓存[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#redis-cache "此标题的永久链接")

#### 标准缓存[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#standard-cache "此标题的永久链接")

使用Redis缓存提示和响应。

In [6]:
# We can do the same thing with a Redis cache
# (make sure your local Redis instance is running first before running this example)
from redis import Redis
from langchain.cache import RedisCache
import langchain
from langchain.llms import OpenAI

langchain.llm_cache = RedisCache(redis_=Redis(host="192.168.3.34",password="lowrisk"))

In [7]:
# To make the caching really obvious, lets use a slower model.
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2)

In [8]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: total: 46.9 ms
Wall time: 7.03 s


"\n\nWhy don't scientists trust atoms?\nBecause they make up everything."

In [9]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: total: 0 ns
Wall time: 3.99 ms


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

### 语义缓存[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#semantic-cache "此标题的永久链接")

使用Redis缓存提示和响应，并根据语义相似性评估命中。

In [18]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.cache import RedisSemanticCache

# redis 语义缓存
langchain.llm_cache = RedisSemanticCache(
    redis_url="redis://:lowrisk@192.168.3.34:6379",
    embedding=OpenAIEmbeddings()
)

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: user 351 ms, sys: 156 ms, total: 507 ms  
Wall time: 3.37 s  
"\n\nWhy don't scientists trust atoms?\nBecause they make up everything."

In [None]:
%%time
# The second time, while not a direct hit, the question is semantically similar to the original question,
# so it uses the cached result!
llm("Tell me one joke")

CPU times: user 6.25 ms, sys: 2.72 ms, total: 8.97 ms  
Wall time: 262 ms   
"\n\nWhy don't scientists trust atoms?\nBecause they make up everything."

## GPTCache [#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#gptcache "此标题的永久链接")

我们可以使用[GPTCache](https://github.com/zilliztech/GPTCache)进行精确匹配缓存或基于语义相似性缓存结果

先从精确匹配的例子说起

In [22]:
# !pip install gptcache

In [23]:
from gptcache import Cache
from gptcache.manager.factory import manager_factory
from gptcache.processor.pre import get_prompt
from langchain.cache import GPTCache
import hashlib

def get_hashed_name(name):
    return hashlib.sha256(name.encode()).hexdigest()

def init_gptcache(cache_obj: Cache, llm: str):
    hashed_llm = get_hashed_name(llm)
    cache_obj.init(
        pre_embedding_func=get_prompt,
        data_manager=manager_factory(manager="map", data_dir=f"map_cache_{hashed_llm}"),
    )

langchain.llm_cache = GPTCache(init_gptcache)

In [24]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: total: 31.2 ms
Wall time: 2.65 s


'\n\nHow do you catch a cheetah? You tie him to a post!'

In [25]:
%%time
# The second time it is, so it goes faster
llm("Tell me a joke")

CPU times: total: 0 ns
Wall time: 0 ns


'\n\nHow do you catch a cheetah? You tie him to a post!'

现在让我们展示一个相似性缓存的例子

In [26]:
from gptcache import Cache
from gptcache.adapter.api import init_similar_cache
from langchain.cache import GPTCache
import hashlib

def get_hashed_name(name):
    return hashlib.sha256(name.encode()).hexdigest()

def init_gptcache(cache_obj: Cache, llm: str):
    hashed_llm = get_hashed_name(llm)
    init_similar_cache(cache_obj=cache_obj, data_dir=f"similar_cache_{hashed_llm}")

langchain.llm_cache = GPTCache(init_gptcache)

In [27]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
Downloading (…)okenizer_config.json: 100%|██████████| 465/465 [00:00<00:00, 93.3kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)lve/main/config.json: 100%|██████████| 827/827 [00:00<00:00, 207kB/s]
Downloading spiece.model: 100%|██████████| 760k/760k [00:00<00:00, 918kB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 1.31M/1.31M [00:01<00:00, 1.27MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 245/245 [00:00<00:00, 123kB/s]
Downloading model.onnx: 100%|██████████| 46.9M/46.9M [00:05<00:00, 8.25MB/s]


CPU times: total: 4.73 s
Wall time: 4min 19s


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

In [28]:
%%time
# This is an exact match, so it finds it in the cache
llm("Tell me a joke")

CPU times: total: 1.55 s
Wall time: 575 ms


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

In [29]:
%%time
# This is not an exact match, but semantically within distance so it hits!
llm("Tell me joke")

CPU times: total: 1.31 s
Wall time: 355 ms


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

## SQLAlchemy 缓存[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#sqlalchemy-cache "此标题的永久链接")

\# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.

\# from langchain.cache import SQLAlchemyCache
\# from sqlalchemy import create\_engine

\# engine = create\_engine("postgresql://postgres:postgres@localhost:5432/postgres")
\# langchain.llm\_cache = SQLAlchemyCache(engine)

Copy to clipboard

### 自定义 SQLAlchemy 模式[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#custom-sqlalchemy-schemas "此标题的永久链接")

\# You can define your own declarative SQLAlchemyCache child class to customize the schema used for caching. For example, to support high-speed fulltext prompt indexing with Postgres, use:

from sqlalchemy import Column, Integer, String, Computed, Index, Sequence
from sqlalchemy import create\_engine
from sqlalchemy.ext.declarative import declarative\_base
from sqlalchemy\_utils import TSVectorType
from langchain.cache import SQLAlchemyCache

Base \= declarative\_base()

class FulltextLLMCache(Base):  \# type: ignore
 """Postgres table for fulltext-indexed LLM Cache"""

    \_\_tablename\_\_ \= "llm\_cache\_fulltext"
    id \= Column(Integer, Sequence('cache\_id'), primary\_key\=True)
    prompt \= Column(String, nullable\=False)
    llm \= Column(String, nullable\=False)
    idx \= Column(Integer)
    response \= Column(String)
    prompt\_tsv \= Column(TSVectorType(), Computed("to\_tsvector('english', llm || ' ' || prompt)", persisted\=True))
    \_\_table\_args\_\_ \= (
        Index("idx\_fulltext\_prompt\_tsv", prompt\_tsv, postgresql\_using\="gin"),
    )

engine \= create\_engine("postgresql://postgres:postgres@localhost:5432/postgres")
langchain.llm\_cache \= SQLAlchemyCache(engine, FulltextLLMCache)

## 可选缓存[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#optional-caching "此标题的永久链接")

如果您愿意，您还可以关闭特定 LLM 的缓存。在下面的示例中，即使启用了全局缓存，我们也会为特定的 LLM 将其关闭

In [30]:
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2, cache=False)

In [31]:
%%time
llm("Tell me a joke")

CPU times: total: 1.73 s
Wall time: 3.19 s


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

In [32]:
%%time
llm("Tell me a joke")

CPU times: total: 1.97 s
Wall time: 1.28 s


'\n\nWhy did the chicken cross the road?\n\nTo get to the other side.'

## 链中的可选缓存[#](https://python.langchain.com/en/latest/modules/models/llms/examples/llm_caching.html#optional-caching-in-chains "此标题的永久链接")

您还可以关闭链中特定节点的缓存。请注意，由于某些接口，通常更容易先构建链，然后再编辑 LLM。

作为示例，我们将加载一个汇总器 map-reduce 链。我们将缓存映射步骤的结果，但不会冻结合并步骤的结果。

In [70]:
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain

In [62]:
# 创建两个模型
llm = OpenAI(model_name="text-davinci-002")
no_cache_llm = OpenAI(model_name="text-davinci-002", cache=False)

In [149]:
text_splitter = RecursiveCharacterTextSplitter()
# text_splitter = CharacterTextSplitter()
with open('data/文本.txt',encoding='utf-8') as f:
    state_of_the_union = f.read()
texts = text_splitter.split_text(state_of_the_union)

In [150]:
texts

['1、\t云南经济财政情况\n云南地处西南边陲，整体经济偏弱，GDP2.45万亿，在中国省份中处于中等偏下（与陕西、辽宁、江西、重庆、广西体量近似 高于贵州），过去几年年均8%的经济增速主要靠固定资产投资拉动。固定资产投资主要来自基建+房地产行业，制造业投资慢于全国平均。优势行业是烟草、有色金属矿物、电力等。云南的预算体量2100亿，自由财力3300亿左右（未算上城投利息），过去的税收主要由烟草（占比40%）贡献。十四五期间云南依然有大规模的投资计划，集中在道路基建+水电，整体感觉对经济和税收的带动能力一般，但是债务会进一步加重（目前云南的道路修建量及密集度不及周边各省）。我们整体对经济的判断偏悲观。\n2、\t平台债务情况及债券到期情况\n云南省目前归集到的发债平台的有息债务近1.23万亿，主要集中在省级平台（8477亿）和昆明市的平台（2911亿），两者占比超90%。目前归集到的发债主体债务的2020预算收入/2020自由财力/三年平均自由财力债务率在5.8/4.1/4.8倍。目前省里还有一家看起来比较大的未发债主体：有80多个县市的城镇开发公司的云南省扶贫投资开发有限公司。算上这个主体+下面城市的债务，预计全省的有息债务在2万亿左右，则2020预算收入/2020自由财力/三年平均自由财力债务率9.5/7.9/9.5倍。\n2016年后其他县市基本发不出债来，主要依靠省级平台+昆明市级平台发债。目前云南省存续债券3300亿左右，省级2262亿，市级717亿。2020Q4后省级主体的债券基本都是净收缩的，云南康旅因为2020年到期压力较大，更早的时候债券就开始净收缩了。\n从债券到期来看，2020、2021是云南省城投债券的到期高峰。2021年全年到期1346亿元，主要是省级平台的1021亿到期。月份分布上，相对压力较大的是3、4、5月，但整体分布较为均衡，每个月100亿左右的到期。\n3、省级四家主要平台简单分析\n排序上：云南建投>云南交投>云投控>云南康旅\n云南建投：业务是建筑+保障房及棚改投资，市场认为资质最好的省属企业。2020Q3有息债务2361亿，债券不到300亿，基本都是长期的银行贷款，整体短期到期压力不大。授信结构基本都是大行授信且近些年授信持续增加。\n云南交投：省级交通平台。2020Q3有息债务近2900亿，320亿的债券，大头是长期银行

In [151]:
llm.get_num_tokens(text=texts[0])

2522

In [152]:
len(texts)

1

In [153]:
docs = [Document(page_content=t) for t in texts]

In [154]:
chain = load_summarize_chain(llm=llm,chain_type="map_reduce",reduce_llm = no_cache_llm)

In [155]:
chain.run(docs)

"\n\nYunnan Chengtou was renamed Yunnan Kanglu in October 2020. The company's main business sectors are urban development, healthcare, tourism, and water services. Healthcare and tourism are the company's focus for the future, with support expected, but current scale is average. The urban development sector is the company's main source of income and profits at the moment, but it is also under pressure, facing difficulties with divestment, limited space for refinancing, and high financing pressure. The company needs to change its business model and integrate it with the core business. The water sector is small but has potential. The company's subsidiary, Yunnan Chengtou Real Estate Co., Ltd., is responsible for the real estate sector. Another subsidiary, Yunnan Water Co., Ltd., is the main financing and investment entity for water projects in Yunnan Province. Yunnan Investment Holding Group Co., Ltd. (Yunnan Investment) is the largest comprehensive investment entity under the Yunnan Pro

In [145]:
chain.run(docs)

"\n\nYunnan Chengtou was renamed Yunnan Kanglu in October 2020. The company's main business sectors are urban development, healthcare, tourism, and water services. Healthcare and tourism are the company's focus for the future, but the urban development sector is the company's main source of income and profits at the moment. The water sector is small but has potential. The company's subsidiary, Yunnan Chengtou Real Estate Co., Ltd., is responsible for the real estate sector. Another subsidiary, Yunnan Water Co., Ltd., is the main financing and investment entity for water projects in Yunnan Province. Yunnan Investment Holding Group Co., Ltd. (Yunnan Investment) is the largest comprehensive investment entity under the Yunnan Provincial government, with controlling stakes in Yunnan Energy Investment Holding Group Co., Ltd. (Yunnan Energy) and Yunnan Iron & Steel Investment Holding Group Co., Ltd. (Yunnan Iron)."

# 如何流式传输 LLM 和聊天模型响应[#](https://python.langchain.com/en/latest/modules/models/llms/examples/streaming_llm.html#how-to-stream-llm-and-chat-model-responses "此标题的永久链接")

LangChain 为 LLM 提供流媒体支持。目前，我们支持`OpenAI`、`ChatOpenAI`和`ChatAnthropic`实现的流式处理，但对其他 LLM 实现的流式处理支持也在路线图上。要利用流式传输，请使用[`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py)实现`on_llm_new_token`. 在这个例子中，我们使用`StreamingStdOutCallbackHandler`.

In [157]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI, ChatAnthropic
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import HumanMessage

In [159]:
llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = llm("请用PYthon实现冒泡排序？")



def bubble_sort(list):
    for i in range(len(list)-1):
        for j in range(len(list)-1-i):
            if list[j] > list[j+1]:
                list[j], list[j+1] = list[j+1], list[j]
    return list

list = [3,5,2,1,4]
print(bubble_sort(list))

In [160]:
llm.generate(["Tell me a joke."])



Q: What did the fish say when it hit the wall?
A: Dam!

LLMResult(generations=[[Generation(text='\n\nQ: What did the fish say when it hit the wall?\nA: Dam!', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})

In [161]:
chat = ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = chat([HumanMessage(content="Write me a song about sparkling water.")])

Verse 1:
Bubbles rising to the top
A refreshing drink that never stops
Clear and crisp, it's oh so pure
Sparkling water, I can't ignore

Chorus:
Sparkling water, oh how you shine
A taste so clean, it's simply divine
You quench my thirst, you make me feel alive
Sparkling water, you're my favorite vibe

Verse 2:
No sugar, no calories, just H2O
A drink that's good for me, don't you know
With lemon or lime, you're even better
Sparkling water, you're my forever

Chorus:
Sparkling water, oh how you shine
A taste so clean, it's simply divine
You quench my thirst, you make me feel alive
Sparkling water, you're my favorite vibe

Bridge:
You're my go-to drink, day or night
You make me feel so light
I'll never give you up, you're my true love
Sparkling water, you're sent from above

Chorus:
Sparkling water, oh how you shine
A taste so clean, it's simply divine
You quench my thirst, you make me feel alive
Sparkling water, you're my favorite vibe

Outro:
Sparkling water, you're the one for me
I'll 

In [162]:
chat = ChatAnthropic(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = chat([HumanMessage(content="请介绍一下你自己")])

ValidationError: 1 validation error for ChatAnthropic
__root__
  Did not find anthropic_api_key, please add an environment variable `ANTHROPIC_API_KEY` which contains it, or pass  `anthropic_api_key` as a named parameter. (type=value_error)

# 如何跟踪令牌使用情况[#](https://python.langchain.com/en/latest/modules/models/llms/examples/token_usage_tracking.html#how-to-track-token-usage "此标题的永久链接")

此笔记本介绍了如何跟踪特定呼叫的令牌使用情况。它目前仅针对 OpenAI API 实现。

让我们首先看一个非常简单的跟踪单个 LLM 调用的令牌使用示例。

In [169]:
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback

In [170]:
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2)

In [171]:
with get_openai_callback() as cb:
    result = llm("Tell me a joke")
    print(cb)

Tokens Used: 0
	Prompt Tokens: 0
	Completion Tokens: 0
Successful Requests: 0
Total Cost (USD): $0.0


In [172]:
with get_openai_callback() as cb:
    result = llm("Tell me a joke")
    result2 = llm("Tell me a joke")
    print(cb.total_tokens)

0


In [176]:
# !pip install google-search-results

In [175]:
# 如果使用其中包含多个步骤的链或代理，它将跟踪所有这些步骤。

from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)
tools = load_tools(["serpapi", "llm-math"], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [177]:
with get_openai_callback() as cb:
    response = agent.run("Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?")
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Total Cost (USD): ${cb.total_cost}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.
Action: Search
Action Input: "Olivia Wilde boyfriend"[0m
Observation: [36;1m[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.[0m
Thought:[32;1m[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.
Action: Search
Action Input: "Olivia Wilde boyfriend"[0m
Observation: [36;1m[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.[0m
Thought:[32;1m[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.
Action: Search
Action Input: "Olivia Wilde boyfriend"[0m
Observation: [36;1m[1;3mOlivia Wilde started dating Harry Styles after ending he

InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Got invalid dimensions for input: token_type_ids for the following indices
 index: 1 Got: 525 Expected: 512
 Please fix either the inputs or the model.

# 聊天模型chat

In [178]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [179]:
chat = ChatOpenAI(temperature=0)

In [180]:
chat([HumanMessage(content="Translate this sentence from English to French. I love programming.")])

AIMessage(content="J'aime programmer.", additional_kwargs={}, example=False)

In [181]:
messages = [
    SystemMessage(content="You are a helpful assistant that translates English to French."),
    HumanMessage(content="I love programming.")
]
chat(messages)

AIMessage(content="J'adore la programmation.", additional_kwargs={}, example=False)

In [182]:
batch_messages = [
    [
        SystemMessage(content="You are a helpful assistant that translates English to Chinese."),
        HumanMessage(content="I love programming.")
    ],
    [
        SystemMessage(content="You are a helpful assistant that translates English to Chinese."),
        HumanMessage(content="I love artificial intelligence.")
    ],
]
result = chat.generate(batch_messages)
result

LLMResult(generations=[[ChatGeneration(text='我喜欢编程。', generation_info=None, message=AIMessage(content='我喜欢编程。', additional_kwargs={}, example=False))], [ChatGeneration(text='我喜欢人工智能。', generation_info=None, message=AIMessage(content='我喜欢人工智能。', additional_kwargs={}, example=False))]], llm_output={'token_usage': {'prompt_tokens': 57, 'completion_tokens': 19, 'total_tokens': 76}, 'model_name': 'gpt-3.5-turbo'})

In [183]:
result.llm_output

{'token_usage': {'prompt_tokens': 57,
  'completion_tokens': 19,
  'total_tokens': 76},
 'model_name': 'gpt-3.5-turbo'}

## 提示模板[#](https://python.langchain.com/en/latest/modules/models/chat/getting_started.html#prompttemplates "此标题的永久链接")

您可以通过使用`MessagePromptTemplate`. 您可以`ChatPromptTemplate`从一个或多个构建一个`MessagePromptTemplates`。您可以使用`ChatPromptTemplate`'s `format_prompt`– 这会返回一个`PromptValue`，您可以将其转换为字符串或 Message 对象，具体取决于您是要使用格式化值作为 llm 还是聊天模型的输入。

为方便起见，`from_template`模板上公开了一个方法。如果您要使用此模板，它会是这样的：

In [184]:
template="You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [186]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(input_language="English", output_language="Chinese", text="I love programming.").to_messages())



AIMessage(content='我喜欢编程。', additional_kwargs={}, example=False)

In [187]:
prompt=PromptTemplate(
    template="You are a helpful assistant that translates {input_language} to {output_language}.",
    input_variables=["input_language", "output_language"],
)
system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)

In [188]:
chain = LLMChain(llm=chat, prompt=chat_prompt)
chain.run(input_language="English", output_language="French", text="I love programming.")

"J'adore la programmation."

In [189]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
chat = ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = chat([HumanMessage(content="Write me a song about sparkling water.")])

Verse 1:
Bubbles rising to the top
A refreshing drink that never stops
Clear and crisp, it's oh so pure
Sparkling water, I can't ignore

Chorus:
Sparkling water, oh how you shine
A taste so clean, it's simply divine
You quench my thirst, you make me feel alive
Sparkling water, you're my favorite vibe

Verse 2:
No sugar, no calories, just H2O
A drink that's good for me, don't you know
With lemon or lime, you're even better
Sparkling water, you're my forever

Chorus:
Sparkling water, oh how you shine
A taste so clean, it's simply divine
You quench my thirst, you make me feel alive
Sparkling water, you're my favorite vibe

Bridge:
You're my go-to drink, day or night
You make me feel so light
I'll never give you up, you're my true love
Sparkling water, you're sent from above

Chorus:
Sparkling water, oh how you shine
A taste so clean, it's simply divine
You quench my thirst, you make me feel alive
Sparkling water, you're my favorite vibe

Outro:
Sparkling water, you're the one for me
I'll 

In [192]:
!rm .langchain.db sqlite.db

'rm' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
