# L4: Enhancing Cache Effectiveness

In this lab, you‚Äôll learn several techniques to make your cache more accurate‚Äîlike threshold tuning, cross-encoders, LLM checks, and fuzzy matching.


<p style="background-color:#fff6e4; padding:15px; border-width:3px; border-color:#f5ecda; border-style:solid; border-radius:6px"> ‚è≥ <b>Note <code>(Kernel Starting)</code>:</b> This notebook takes about 30 seconds to be ready to use. You may start and watch the video while you wait.</p>

In [None]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

## Setup Environment

In [None]:
%load_ext autoreload
%autoreload 2
    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from cache.cross_encoder import CrossEncoder
from cache.faq_data_container import FAQDataContainer
from cache.llm_evaluator import LLMEvaluator
from cache.wrapper import SemanticCacheWrapper
from cache.evals import CacheEvaluator
from cache.config import config

# Set up plotting style
plt.style.use("default")
sns.set_palette("husl")

print("üì¶ Libraries and evaluation utilities imported successfully")

<div style="background-color:#fff6ff; padding:13px; border-width:3px; border-color:#efe6ef; border-style:solid; border-radius:6px">
<p> üíª &nbsp; <b>Access <code>requirements.txt</code> and <code>helper.py</code> files:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Open"</em>.

<p> ‚¨á &nbsp; <b>Download Notebooks:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Download as"</em> and select <em>"Notebook (.ipynb)"</em>.</p>
</div>

## Load data and setup cache

In [None]:
cache_wrapper = SemanticCacheWrapper.from_config(config)

In [None]:
data_container = FAQDataContainer()

test_queries = data_container.test_df["question"].tolist()

In [None]:
cache_wrapper.hydrate_from_df(data_container.faq_df)
cache_results = cache_wrapper.check_many(
    test_queries, distance_threshold=0.3
)

evaluator = CacheEvaluator(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)
evaluator.report_metrics()

## Threshold sweep

In [None]:
cache_results = cache_wrapper.check_many(
    test_queries, distance_threshold=1
)
evaluator = CacheEvaluator.from_full_retrieval(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)

evaluator.report_threshold_sweep(
    metric_to_maximize="f1_score",
    metrics_to_plot=["f1_score", "precision", "recall"],
)

## Cross Encoder Reranking

In [None]:
# Initialize cross encoder for reranking
cross_encoder = CrossEncoder("Alibaba-NLP/gte-reranker-modernbert-base")

# Register cross encoder as a reranker with the cache wrapper
cache_wrapper.register_reranker(cross_encoder.create_reranker())

In [None]:
# Test cross encoder directly (for demonstration)
cross_encoder.predict(
    [
        "what is the capital of China?",
        "how to implement quick sort in python?",
        "how to implement quick sort in python?",
    ],
    ["Beijing", "Introduction of quick sort", "The weather is nice today"],
)

In [None]:
sentences = [
    "The bank raised its interest rates.",
    "The river overflowed near the bank after heavy rain."
]

langcache_distance = cache_wrapper.pair_distance(
    sentences[0], sentences[1]
)
cross_encoder_distance = cross_encoder.pair_distance(
    sentences[0], sentences[1]
)

langcache_distance, cross_encoder_distance

In [None]:
cache_results = cache_wrapper.check_many(
    test_queries,
    distance_threshold=1,
    num_results=10,
    use_reranker_distance=True,
)
evaluator = CacheEvaluator.from_full_retrieval(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)

evaluator.report_threshold_sweep()

## LLM Reranking in a Tiered System

In [None]:
from cache.config import load_openai_key

load_openai_key()

In [None]:
llm = LLMEvaluator.construct_with_gpt()

cache_wrapper.clear_reranker()
cache_wrapper.register_reranker(llm.create_reranker(batch_size=4))

In [None]:
cache_results = cache_wrapper.check_many(
    test_queries,
    distance_threshold=0.2828,
    num_results=1,
    show_progress=True,
)

In [None]:
evaluator = CacheEvaluator.from_full_retrieval(
    true_labels=data_container.label_cache_hits(cache_results),
    cache_results=cache_results,
)

evaluator.report_metrics()

<p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> üö®
&nbsp; <b>Different Run Results:</b> The output visualizations generated may differ from those shown in the video.</p>

## Fuzzy Matching

In [None]:
def fuzzify_string(str, its=3):
    for i in range(its):
        str_list = list(str)
        i = np.random.randint(0, len(str) - 1)
        str_list[i], str_list[i + 1] = str_list[i + 1], str_list[i]
        str = "".join(str_list)
    return str

In [None]:
fuzzy_queries = []
valid_query_map = set()
for q in data_container.faq_df["question"].tolist():
    for dificulty in [2, 3, 4, 10, 10000]:
        new_entry = fuzzify_string(q, dificulty)
        fuzzy_queries.append(new_entry)
        valid_query_map.add((new_entry, q))

In [None]:
fuzzy_queries[:5]

In [None]:
from cache.fuzzy_cache import FuzzyCache

fuzzy_cache = FuzzyCache()
fuzzy_cache.hydrate_from_df(data_container.faq_df)
fuzzy_retrievals = fuzzy_cache.check_many(fuzzy_queries)

In [None]:
fuzzy_retrievals[:5]

In [None]:
valid_fuzzy_labeling = [
    (r.query, r.matches[0].prompt) in valid_query_map if len(r.matches) > 0 else False
    for r in fuzzy_retrievals
]

In [None]:
CacheEvaluator.from_full_retrieval(
    true_labels=valid_fuzzy_labeling,
    cache_results=fuzzy_retrievals,
).report_metrics(distance_threshold=0.6)

In [None]:
cache_wrapper.cache.clear()