In [None]:
from langchain.llms import OpenAI

### In-mem cache

In [None]:
import langchain
from langchain.cache import InMemoryCache
langchain.llm_cache = InMemoryCache()

In [None]:
# To make the caching really obvious, lets use a slower model.
llm = OpenAI(model_name="text-davinci-002", n=2, best_of=2)

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

In [None]:
%%time
# The second time it is, so it goes faster
llm("Tell me a joke")

### SQLite cache

In [None]:
!rm .langchain.db

In [None]:
# We can do the same thing with a SQLite cache
from langchain.cache import SQLiteCache
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

In [None]:
%%time
# The second time it is, so it goes faster
llm("Tell me a joke")

In [None]:
%%time
# Not exact match, should take long again
llm("Tell me a nice joke")

### RedisSemanticCache

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.cache import RedisSemanticCache


langchain.llm_cache = RedisSemanticCache(
    redis_url="redis://localhost:6379",
    embedding=OpenAIEmbeddings()
)


In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

In [None]:
%%time
# The second time, while not a direct hit, the question is semantically similar to the original question,
# so it uses the cached result!
llm("Tell me one joke")

In [None]:
# explicit playing with the embeddings
import numpy as np

emb = langchain.llm_cache.embedding

pv1 = np.array(emb.embed_query("Tell me a joke"), dtype=float)
pv2 = np.array(emb.embed_query("Tell me one joke"), dtype=float)
pvZ = np.array(emb.embed_query("I once saw a platypus cooking dinner"), dtype=float)

print('pv1 . pv1 = %.4f', np.dot(pv1, pv1))
print('pv2 . pv2 = %.4f', np.dot(pv2, pv2))
print('pvZ . pvZ = %.4f', np.dot(pvZ, pvZ))
print('')
print('pv1 . pv2 = %.4f', np.dot(pv1, pv2))
print('pv1 . pvZ = %.4f', np.dot(pv1, pvZ))

### GPTCache

#### Exact match

In [None]:
import gptcache
from gptcache.processor.pre import get_prompt
from gptcache.manager.factory import get_data_manager
from langchain.cache import GPTCache

# Avoid multiple caches using the same file, causing different llm model caches to affect each other
i = 0
file_prefix = "data_map"

def init_gptcache_map(cache_obj: gptcache.Cache):
    global i
    cache_path = f'{file_prefix}_{i}.txt'
    cache_obj.init(
        pre_embedding_func=get_prompt,
        data_manager=get_data_manager(data_path=cache_path),
    )
    i += 1

langchain.llm_cache = GPTCache(init_gptcache_map)

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

In [None]:
%%time
# The second time it is, so it goes faster
llm("Tell me a joke")

In [None]:
# Some inspection to play with this gptcache hidden in the belly of the langchain stuff
gpCache = langchain.llm_cache.gptcache_dict["[('_type', 'openai'), ('best_of', 2), ('frequency_penalty', 0), ('logit_bias', {}), ('max_tokens', 256), ('model_name', 'text-davinci-002'), ('n', 2), ('presence_penalty', 0), ('request_timeout', None), ('stop', None), ('temperature', 0.7), ('top_p', 1)]"]
# Force file write :)
gpCache.flush()

# In this case you can check that...
gpcache = list(langchain.llm_cache.gptcache_dict.values())[0]
gpcache.embedding_func('aaa')
# ... this is the identity (exact cache!)

#### Similarity caching

In [None]:
import gptcache
from gptcache.processor.pre import get_prompt
from gptcache.manager.factory import get_data_manager
from langchain.cache import GPTCache
from gptcache.manager import get_data_manager, CacheBase, VectorBase
from gptcache import Cache
from gptcache.embedding import Onnx
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation

# Avoid multiple caches using the same file, causing different llm model caches to affect each other
i = 0
file_prefix = "data_map"
llm_cache = Cache()


def init_gptcache_map(cache_obj: gptcache.Cache):
    global i
    cache_path = f'{file_prefix}_{i}.txt'
    onnx = Onnx()
    cache_base = CacheBase('sqlite')
    vector_base = VectorBase('faiss', dimension=onnx.dimension)
    data_manager = get_data_manager(cache_base, vector_base, max_size=10, clean_size=2)
    cache_obj.init(
        pre_embedding_func=get_prompt,
        embedding_func=onnx.to_embeddings,
        data_manager=data_manager,
        similarity_evaluation=SearchDistanceEvaluation(),
    )
    i += 1

langchain.llm_cache = GPTCache(init_gptcache_map)

In [None]:
# this is what the similarity cache uses under the hood:
from gptcache.embedding import Onnx
Onnx().to_embeddings('Today is a sunny day.')
# a (768,) ndarray

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

In [None]:
%%time
# This is an exact match, so it finds it in the cache
llm("Tell me a joke")

In [None]:
%%time
# This is not an exact match, but semantically within distance so it hits!
llm("Tell me joke")

### SQLAlchemy

In [None]:
# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.

from langchain.cache import SQLAlchemyCache
from sqlalchemy import create_engine

engine = create_engine("postgresql://postgres:cachepwd@172.17.0.2:5432/postgres")
langchain.llm_cache = SQLAlchemyCache(engine)

In [None]:
%%time
# The first time, it is not yet in cache, so it should take longer
llm("Tell me a joke")

In [None]:
%%time
# This is an exact match, so it finds it in the cache
llm("Tell me a joke")

In [None]:
%%time
# This is not an exact match, but semantically within distance so it hits!
llm("Tell me joke")

##### SQLAlchemy with custom schema

In [None]:
# You can define your own declarative SQLAlchemyCache child class to customize the schema used for caching. For example, to support high-speed fulltext prompt indexing with Postgres, use:

from sqlalchemy import Column, Integer, String, Computed, Index, Sequence
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy_utils import TSVectorType
from langchain.cache import SQLAlchemyCache

Base = declarative_base()


class FulltextLLMCache(Base):  # type: ignore
    """Postgres table for fulltext-indexed LLM Cache"""

    __tablename__ = "llm_cache_fulltext"
    id = Column(Integer, Sequence('cache_id'), primary_key=True)
    prompt = Column(String, nullable=False)
    llm = Column(String, nullable=False)
    idx = Column(Integer)
    response = Column(String)
    prompt_tsv = Column(TSVectorType(), Computed("to_tsvector('english', llm || ' ' || prompt)", persisted=True))
    __table_args__ = (
        Index("idx_fulltext_prompt_tsv", prompt_tsv, postgresql_using="gin"),
    )

engine = create_engine("postgresql://postgres:cachepwd@172.17.0.2:5432/postgres")
langchain.llm_cache = SQLAlchemyCache(engine, FulltextLLMCache)

Note: this uses (not investigated too much) the [TSVECTOR](https://docs.sqlalchemy.org/en/20/dialects/postgresql.html#full-text-search) Postgres type which should be the basis for a full-text search (does not seem to relate to "vector search", however. Indeed, out of the box, this is not semantical similarity)

In [None]:
%%time
# This is not an exact match, but semantically within distance so it hits!
llm("Tell me joke")

In [None]:
%%time
# This is not an exact match, but semantically within distance so it hits!
llm("Tell me a joke")