## LLM请求缓存
***
- 短期缓存
- 长期缓存

### 定义模型
***

In [1]:
from langchain_openai import ChatOpenAI
import os

llm = ChatOpenAI(
    model="gpt-4",
    temperature=0,
    api_key=os.environ.get("OPENAI_API_KEY"),
    base_url=os.environ.get("OPENAI_API_BASE"),
    )

### 引入依赖包
***

In [2]:
from langchain_core.globals import set_llm_cache

### 短期缓存
***

In [3]:
%%time
from langchain_core.caches import InMemoryCache

set_llm_cache(InMemoryCache())

# 第一次不在缓存层中
llm.invoke("给我讲一个笑话")

CPU times: user 22.9 ms, sys: 4.73 ms, total: 27.6 ms
Wall time: 4.25 s


AIMessage(content='好的，这是一个我最近听到的笑话：\n\n为什么电脑永远不会感冒？\n\n因为它有Windows（窗户）！', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 14, 'total_tokens': 59, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5188934a-762b-485d-a314-0c8ceefaa7fc-0', usage_metadata={'input_tokens': 14, 'output_tokens': 45, 'total_tokens': 59, 'input_token_details': {}, 'output_token_details': {}})

In [4]:
%%time
# 第二次命中缓存，速度会很快
llm.invoke("给我讲一个笑话")

CPU times: user 560 μs, sys: 111 μs, total: 671 μs
Wall time: 656 μs


AIMessage(content='好的，这是一个我最近听到的笑话：\n\n为什么电脑永远不会感冒？\n\n因为它有Windows（窗户）！', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 14, 'total_tokens': 59, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5188934a-762b-485d-a314-0c8ceefaa7fc-0', usage_metadata={'input_tokens': 14, 'output_tokens': 45, 'total_tokens': 59, 'input_token_details': {}, 'output_token_details': {}})

### SQLite Cach
***

In [14]:
! pip install langchain-community

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting langchain-community
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7e/31/39c30cab465774835e2c18d3746587e6fd0c9f7265b1c6b1fcd2e1684dd2/langchain_community-0.3.17-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/b4/46/93416fdae86d40879714f72956ac14df9c7b76f7d41a4d68aa9f71a0028b/pydantic_settings-2.7.1-py3-none-any.whl (29 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Using cached https://pypi.tuna.tsinghua.edu.c

In [17]:
!rm .langchain.db

In [18]:
# We can do the same thing with a SQLite cache
from langchain_community.cache import SQLiteCache

set_llm_cache(SQLiteCache(database_path=".langchain.db"))

In [19]:
%%time
# 第一次不在缓存层中
llm.invoke("给我讲一个笑话")

CPU times: user 20.9 ms, sys: 14.1 ms, total: 35 ms
Wall time: 4 s


AIMessage(content='好的，这是一个我最近听到的笑话：\n\n为什么电脑永远不会感冒？\n\n因为它有Windows（窗户）！', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 14, 'total_tokens': 59, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ee976a3a-255e-48ba-9979-594e22fc117a-0', usage_metadata={'input_tokens': 14, 'output_tokens': 45, 'total_tokens': 59, 'input_token_details': {}, 'output_token_details': {}})

In [20]:
%%time
# 第二次命中缓存，速度会很快
llm.invoke("给我讲一个笑话")

CPU times: user 1.92 ms, sys: 1.06 ms, total: 2.99 ms
Wall time: 2.78 ms


AIMessage(content='好的，这是一个我最近听到的笑话：\n\n为什么电脑永远不会感冒？\n\n因为它有Windows（窗户）！', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 14, 'total_tokens': 59, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ee976a3a-255e-48ba-9979-594e22fc117a-0', usage_metadata={'input_tokens': 14, 'output_tokens': 45, 'total_tokens': 59, 'input_token_details': {}, 'output_token_details': {}})