# Working with Redis for Caching

This notebook demonstrates how to use the RedisCache and RedisSemanticCache classes from the langchain-redis package to implement caching for LLM responses using Redis.

# installation

In [6]:
# %pip install ipywidgets
# %pip install langchain-core
# %pip install langchain-redis
# %pip install langchain-openai
# %pip install redis
#%pip install langchain

In [13]:
#%pip install langchain_community

# importing required libraries

In [1]:
import os
import time
import redis

#from langchain.globals import set_llm_cache
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain_redis import RedisCache, RedisSemanticCache

In [12]:
from langchain_community.cache import InMemoryCache
import langchain

langchain.llm_cache = InMemoryCache()

In [2]:
from dotenv import load_dotenv

# Load .env file
load_dotenv()


True

# setting up  redis connection

In [3]:
REDIS_URL = "redis://default:lo1hLY1c0erZN4pLWIFmJeB2UGaVTY7e@redis-16842.c80.us-east-1-2.ec2.cloud.redislabs.com:16842"
# redis_client = redis.from_url(REDIS_URL)
# redis_client.ping()

In [4]:
redis_client = redis.from_url(REDIS_URL)
redis_client.ping()

True

# this means redis db is  working fine

In [5]:
openai_api_key = os.getenv("OPENAI_API_KEY")

# Using Redis as a Standard Cache

In [7]:
import time
import langchain

from langchain_community.cache import RedisCache, RedisSemanticCache
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

#REDIS_URL = "redis://localhost:6379"  # change if needed

# Use a chat model (recommended in new LangChain)
llm = ChatOpenAI(temperature=0)  # uses OPENAI_API_KEY from env

def execute_with_timing(prompt: str):
    start_time = time.time()
    response = llm.invoke(prompt)  # returns an AIMessage
    end_time = time.time()
    return response.content, end_time - start_time




In [8]:
REDIS_URL

'redis://default:lo1hLY1c0erZN4pLWIFmJeB2UGaVTY7e@redis-16842.c80.us-east-1-2.ec2.cloud.redislabs.com:16842'

In [9]:
REDIS_URL = "redis://default:lo1hLY1c0erZN4pLWIFmJeB2UGaVTY7e@redis-16842.c80.us-east-1-2.ec2.cloud.redislabs.com:16842"

redis_client = redis.Redis.from_url(
    REDIS_URL,
    decode_responses=True,
)

In [10]:
redis_cache = RedisCache(redis_=redis_client)

langchain.llm_cache = redis_cache

# traditional cache

In [11]:


prompt = "Explain the concept of caching in three sentences."

# First call (not cached)
result1, time1 = execute_with_timing(prompt)
print("First call (not cached):")
print(f"{result1}\nTime: {time1:.2f} seconds\n")

# Second call (should be cached)
result2, time2 = execute_with_timing(prompt)
print("Second call (cached):")
print(f"{result2}\nTime: {time2:.2f} seconds\n")

print(f"Speed improvement: {time1 / time2:.2f}x faster\n")

# # Clear the cache
# redis_cache.clear()
# print("Cache cleared (standard Redis cache)\n")




First call (not cached):
Caching is the process of storing frequently accessed data in a temporary storage area to improve performance. When a user requests data that has been previously cached, it can be retrieved quickly without having to access the original source. Caching helps reduce load times and improve overall system efficiency by reducing the need to repeatedly fetch data from its original location.
Time: 1.05 seconds

Second call (cached):
Caching is the process of storing frequently accessed data in a temporary storage area to improve performance. When a user requests data that is already cached, it can be retrieved quickly without having to access the original source. Caching helps reduce load times and improve overall system efficiency by reducing the need to repeatedly fetch data from its original location.
Time: 1.13 seconds

Speed improvement: 0.92x faster



# using rediscache as a semantic cache

In [12]:
embeddings = OpenAIEmbeddings()
semantic_cache = RedisSemanticCache(
    redis_url=REDIS_URL, embedding=embeddings, score_threshold=0.2
)



In [13]:
langchain.llm_cache = semantic_cache

In [14]:
langchain.llm_cache

<langchain_community.cache.RedisSemanticCache at 0x1f522afd160>

In [15]:
# Original prompt
original_prompt = "What is the capital of France?"
result1, time1 = execute_with_timing(original_prompt)
print(f"Original query:\nPrompt: {original_prompt}\n")
print(f"{result1}\nTime: {time1:.2f} seconds\n")

# Semantically similar prompt
similar_prompt = "Can you tell me the capital city of France?"
result2, time2 = execute_with_timing(similar_prompt)
print(f"Similar query:\nPrompt: {similar_prompt}\n")
print(f"{result2}\nTime: {time2:.2f} seconds\n")

print(f"Speed improvement: {time1 / time2:.2f}x faster")

Original query:
Prompt: What is the capital of France?

The capital of France is Paris.
Time: 0.73 seconds

Similar query:
Prompt: Can you tell me the capital city of France?

The capital city of France is Paris.
Time: 1.64 seconds

Speed improvement: 0.45x faster


In [16]:
# Semantically similar prompt
similar_prompt_2 = "Hey , quickly tell e capital cty of france?"
result3, time3 = execute_with_timing(similar_prompt_2)
print(f"Similar query:\nPrompt: {similar_prompt_2}\n")
print(f"{result3}\nTime: {time3:.2f} seconds\n")

print(f"Speed improvement: {time1 / time3:.2f}x faster")

Similar query:
Prompt: Hey , quickly tell e capital cty of france?

The capital city of France is Paris.
Time: 0.65 seconds

Speed improvement: 1.13x faster


In [17]:
similar_prompt_3 = "Hey , quickly tell e capital cty of france?"
result4, time4 = execute_with_timing(similar_prompt_3)
print(f"Similar query:\nPrompt: {similar_prompt_3}\n")
print(f"{result4}\nTime: {time4:.2f} seconds\n")

print(f"Speed improvement: {time1 / time4:.2f}x faster")

Similar query:
Prompt: Hey , quickly tell e capital cty of france?

The capital city of France is Paris.
Time: 0.69 seconds

Speed improvement: 1.06x faster
