# L4: Enhancing Cache Effectiveness

In this lab, you‚Äôll learn several techniques to make your cache more accurate‚Äîlike threshold tuning, cross-encoders, LLM checks, and fuzzy matching.


<p style="background-color:#fff6e4; padding:15px; border-width:3px; border-color:#f5ecda; border-style:solid; border-radius:6px"> ‚è≥ <b>Note <code>(Kernel Starting)</code>:</b> This notebook takes about 30 seconds to be ready to use. You may start and watch the video while you wait.</p>

In [1]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

## Setup Environment

In [2]:
%load_ext autoreload
%autoreload 2
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from cache.cross_encoder import CrossEncoder
from cache.faq_data_container import FAQDataContainer
from cache.llm_evaluator import LLMEvaluator
from cache.wrapper import SemanticCacheWrapper
from cache.evals import CacheEvaluator
from cache.config import config

# Set up plotting style
plt.style.use("default")
sns.set_palette("husl")

print("üì¶ Libraries and evaluation utilities imported successfully")

üì¶ Libraries and evaluation utilities imported successfully


<div style="background-color:#fff6ff; padding:13px; border-width:3px; border-color:#efe6ef; border-style:solid; border-radius:6px">
<p> üíª &nbsp; <b>Access <code>requirements.txt</code> and <code>helper.py</code> files:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Open"</em>.

<p> ‚¨á &nbsp; <b>Download Notebooks:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Download as"</em> and select <em>"Notebook (.ipynb)"</em>.</p>
</div>

## Load data and setup cache

In [3]:
cache_wrapper = SemanticCacheWrapper.from_config(config)

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: redis/langcache-embed-v1


‚úÖ Redis is running and accessible!


INFO:redisvl.index.index:Index already exists, not overwriting.


In [4]:
data_container = FAQDataContainer()

test_queries = data_container.test_df["question"].tolist()

Loaded 8 FAQ entries
Loaded 80 test queries


In [5]:
cache_wrapper.hydrate_from_df(data_container.faq_df)
cache_results = cache_wrapper.check_many(
    test_queries, distance_threshold=0.3
)

evaluator = CacheEvaluator(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)
evaluator.report_metrics()

## Threshold sweep

In [6]:
cache_results = cache_wrapper.check_many(
    test_queries, distance_threshold=1
)
evaluator = CacheEvaluator.from_full_retrieval(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)

evaluator.report_threshold_sweep(
    metric_to_maximize="f1_score",
    metrics_to_plot=["f1_score", "precision", "recall"],
)

## Cross Encoder Reranking

In [7]:
# Initialize cross encoder for reranking
cross_encoder = CrossEncoder("Alibaba-NLP/gte-reranker-modernbert-base")

# Register cross encoder as a reranker with the cache wrapper
cache_wrapper.register_reranker(cross_encoder.create_reranker())

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/598M [00:00<?, ?B/s]

In [8]:
# Test cross encoder directly (for demonstration)
cross_encoder.predict(
    [
        "what is the capital of China?",
        "how to implement quick sort in python?",
        "how to implement quick sort in python?",
    ],
    ["Beijing", "Introduction of quick sort", "The weather is nice today"],
)

[0.8946054577827454, 0.9212159514427185, 0.15752527117729187]

In [9]:
sentences = [
    "The bank raised its interest rates.",
    "The river overflowed near the bank after heavy rain."
]

langcache_distance = cache_wrapper.pair_distance(
    sentences[0], sentences[1]
)
cross_encoder_distance = cross_encoder.pair_distance(
    sentences[0], sentences[1]
)

langcache_distance, cross_encoder_distance

(0.46540407730002653, 0.6095241606235504)

In [10]:
cache_results = cache_wrapper.check_many(
    test_queries,
    distance_threshold=1,
    num_results=10,
    use_reranker_distance=True,
)
evaluator = CacheEvaluator.from_full_retrieval(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)

evaluator.report_threshold_sweep()

## LLM Reranking in a Tiered System

In [11]:
from cache.config import load_openai_key

load_openai_key()

> OpenAI API key is already loaded in the environment


In [12]:
llm = LLMEvaluator.construct_with_gpt()

cache_wrapper.clear_reranker()
cache_wrapper.register_reranker(llm.create_reranker(batch_size=4))

In [13]:
cache_results = cache_wrapper.check_many(
    test_queries,
    distance_threshold=0.2828,
    num_results=1,
    show_progress=True,
)

  0%|          | 0/80 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://jupyter-api-proxy.internal.dlai/rev-proxy/openai/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST ht

In [14]:
evaluator = CacheEvaluator.from_full_retrieval(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)

evaluator.report_metrics()

<p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> üö®
&nbsp; <b>Different Run Results:</b> The output visualizations generated may differ from those shown in the video.</p>

## Fuzzy Matching

In [15]:
def fuzzify_string(str, its=3):
    for i in range(its):
        str_list = list(str)
        i = np.random.randint(0, len(str) - 1)
        str_list[i], str_list[i + 1] = str_list[i + 1], str_list[i]
        str = "".join(str_list)
    return str

In [16]:
fuzzy_queries = []
valid_query_map = set()
for q in data_container.faq_df["question"].tolist():
    for dificulty in [2, 3, 4, 10, 10000]:
        new_entry = fuzzify_string(q, dificulty)
        fuzzy_queries.append(new_entry)
        valid_query_map.add((new_entry, q))

In [17]:
fuzzy_queries[:5]

['Hwo d oI get a refund?',
 'How do I get  arefund?',
 'Ho wd oI gte a refun?d',
 'o Hwdo I egt ra efund?',
 'de do HItwrufg  oa ?en']

In [18]:
from cache.fuzzy_cache import FuzzyCache

fuzzy_cache = FuzzyCache()
fuzzy_cache.hydrate_from_df(data_container.faq_df)
fuzzy_retrievals = fuzzy_cache.check_many(fuzzy_queries)

In [19]:
fuzzy_retrievals[:5]

[(Query: 'Hwo d oI get a refund?', Matches: ['How do I get a refund?']),
 (Query: 'How do I get  arefund?', Matches: ['How do I get a refund?']),
 (Query: 'Ho wd oI gte a refun?d', Matches: ['How do I get a refund?']),
 (Query: 'o Hwdo I egt ra efund?', Matches: ['How do I get a refund?']),
 (Query: 'de do HItwrufg  oa ?en', Matches: ['How do I get a refund?'])]

In [20]:
valid_fuzzy_labeling = [
    (r.query, r.matches[0].prompt) in valid_query_map if len(r.matches) > 0 else False
    for r in fuzzy_retrievals
]

In [21]:
CacheEvaluator.from_full_retrieval(
    true_labels=valid_fuzzy_labeling,
    cache_results=fuzzy_retrievals,
).report_metrics(distance_threshold=0.6)

In [22]:
cache_wrapper.cache.clear()