In [1]:
!pip install gptcache

Collecting gptcache
  Downloading gptcache-0.1.43-py3-none-any.whl.metadata (24 kB)
Collecting cachetools (from gptcache)
  Using cached cachetools-5.3.2-py3-none-any.whl.metadata (5.2 kB)
Downloading gptcache-0.1.43-py3-none-any.whl (131 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.5/131.5 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m0m
[?25hUsing cached cachetools-5.3.2-py3-none-any.whl (9.3 kB)
Installing collected packages: cachetools, gptcache
Successfully installed cachetools-5.3.2 gptcache-0.1.43


In [4]:
#load .env
from dotenv import load_dotenv
from langchain.globals import set_llm_cache
from langchain_community.llms import OpenAI
load_dotenv()

# To make the caching really obvious, lets use a slower model.
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", n=2, best_of=2)

In [5]:
import hashlib

from gptcache import Cache
from gptcache.manager.factory import manager_factory
from gptcache.processor.pre import get_prompt
from langchain.cache import GPTCache


def get_hashed_name(name):
    return hashlib.sha256(name.encode()).hexdigest()


def init_gptcache(cache_obj: Cache, llm: str):
    hashed_llm = get_hashed_name(llm)
    cache_obj.init(
        pre_embedding_func=get_prompt,
        data_manager=manager_factory(manager="map", data_dir=f"map_cache_{hashed_llm}"),
    )


set_llm_cache(GPTCache(init_gptcache))

In [6]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: user 30.6 ms, sys: 11.4 ms, total: 42 ms
Wall time: 907 ms


"\n\nWhy don't scientists trust atoms?\n\nBecause they make up everything."

In [7]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

CPU times: user 275 µs, sys: 14 µs, total: 289 µs
Wall time: 293 µs


"\n\nWhy don't scientists trust atoms?\n\nBecause they make up everything."