# L6: Vector Quantization

<p style="background-color:#fff6e4; padding:15px; border-width:3px; border-color:#f5ecda; border-style:solid; border-radius:6px"> ‚è≥ <b>Note <code>(Kernel Starting)</code>:</b> This notebook takes about 30 seconds to be ready to use. You may start and watch the video while you wait.</p>

In [1]:
import warnings
warnings.filterwarnings('ignore')

<p style="background-color:#fff6e4; padding:15px; border-width:3px; border-color:#f5ecda; border-style:solid; border-radius:6px"> ‚è≥ <b>Note <code>(Loading the collection)</code>:</b> The following code block might take a few minutes to complete.</p>

In [2]:
from qdrant_client import QdrantClient, models

client = QdrantClient("http://localhost:6333", timeout=600)
client.delete_collection("wands-products")
client.recover_snapshot(
    "wands-products", 
    "https://storage.googleapis.com/deeplearning-course-c1/snapshots/wands-products.snapshot",
)
collection = client.get_collection("wands-products")
collection

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=85988, points_count=42994, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors={'product_description': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None), 'product_name': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None)}, shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=2, max_segment_size=None, memmap_thresh

<p style="background-color:#fff6ff; padding:15px; border-width:3px; border-color:#efe6ef; border-style:solid; border-radius:6px"> üíª &nbsp; <b>Access <code>requirements.txt</code> and <code>helper.py</code> files:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Open"</em>. For more help, please see the <em>"Appendix - Tips and Help"</em> Lesson.</p>

## Test queries

In [3]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
import pandas as pd

queries_df = pd.read_csv(
    "shared_data/WANDS/query.csv", 
    sep="\t", 
    index_col="query_id",
)
queries_df["query_embedding"] = model.encode(
    queries_df["query"].tolist()
).tolist()
queries_df.sample(n=5)

Unnamed: 0_level_0,query,query_class,query_embedding
query_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
143,queen wingback chair,Accent Chairs,"[0.06874385476112366, 0.01463179662823677, -0...."
394,gray leather cocktail table,Coffee & Cocktail Tables,"[0.004707918036729097, 0.01947840489447117, -0..."
27,outdoor welcome rug,Door Mats,"[0.03681795299053192, 0.09876515716314316, 0.0..."
245,anchor decor,Wall D√©cor,"[-0.004735210444778204, 0.07532159239053726, 0..."
133,outdoor light fixtures,Outdoor Wall Lights,"[0.06439870595932007, 0.02422916516661644, 0.0..."


In [5]:
from collections import defaultdict
from ranx import Qrels

knn_qrels_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
        search_params=models.SearchParams(
            exact=True,  # enable exact search
        ),
    )
    
    for point in results:
        document_id = f"doc_{point.id}"
        # The conversion to integer is required because ranx expects integers
        knn_qrels_dict[query_id][document_id] = int(point.score * 100)

qrels = Qrels(knn_qrels_dict)
qrels

DictType[unicode_type,DictType[[unichr x 9],int64]<iv=None>]<iv=None>({query_0: {doc_7465: 91, doc_9234: 82, doc_42329: 81, doc_24010: 81, doc_18273: 81, doc_18276: 80, doc_25431: 80, doc_18272: 78, doc_36910: 78, doc_18277: 78, doc_19456: 77, doc_24006: 76, doc_40996: 76, doc_18274: 75, doc_18275: 75, doc_24008: 75, doc_18270: 75, doc_24009: 75, doc_26069: 75, doc_42330: 75, doc_31556: 75, doc_4410: 75, doc_7506: 74, doc_6168: 74, doc_4034: 74, doc_26070: 74, doc_28058: 73, doc_18271: 73, doc_26068: 73, doc_18158: 73, doc_15612: 73, doc_6982: 73, doc_28687: 73, doc_12409: 73, doc_2187: 72, doc_251: 72, doc_33689: 72, doc_39461: 72, doc_33690: 71, doc_31557: 71, doc_26071: 71, doc_31555: 70, doc_6167: 70, doc_39429: 70, doc_39428: 69, doc_9207: 69, doc_8994: 69, doc_975: 69, doc_24007: 68, doc_28059: 68}, query_1: {doc_33698: 83, doc_22679: 81, doc_5173: 81, doc_33290: 81, doc_608: 81, doc_20513: 81, doc_40418: 77, doc_5205: 77, doc_5132: 76, doc_26951: 76, doc_607: 76, doc_35905: 76, 

## Product Quantization (PQ)

In [6]:
client.update_collection(
    "wands-products",
    quantization_config=models.ProductQuantization(
        product=models.ProductQuantizationConfig(
            compression=models.CompressionRatio.X64,
            always_ram=True,
        )    
    ),
)

True

In [7]:
import time

time.sleep(1.0)
collection = client.get_collection("wands-products")
while collection.status != models.CollectionStatus.GREEN:
    time.sleep(1.0)
    collection = client.get_collection("wands-products")
    
collection

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=85988, points_count=42994, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors={'product_description': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None), 'product_name': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None)}, shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=2, max_segment_size=None, memmap_thresh

In [8]:
import numpy as np

response_times = []
pq_run_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"

    # Measure the initial time
    start_time = time.monotonic()
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                rescore=False # Disable re-scoring using the original vectors
            )
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
    )

    # Store the response time in the list
    response_times.append(time.monotonic() - start_time)
    
    for point in results:
        document_id = f"doc_{point.id}"
        pq_run_dict[query_id][document_id] = point.score
    
np.mean(response_times)

0.001871973095330759

In [9]:
from ranx import Run, evaluate

product_name_pq_run = Run(
    pq_run_dict, 
    name="product_name_pq"
)
evaluate(
    qrels=qrels, 
    run=product_name_pq_run, 
    metrics=["precision@25"]
)

0.6445833333333333

In [10]:
response_times = []
pq_rescore_run_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"
    
    start_time = time.monotonic()
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                rescore=True # Enable re-scoring using the original vectors
            )
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
    )
    
    response_times.append(time.monotonic() - start_time)
    
    for point in results:
        document_id = f"doc_{point.id}"
        pq_rescore_run_dict[query_id][document_id] = point.score
    
np.mean(response_times)

0.0018279000292144095

In [11]:
product_name_pq_rescore_run = Run(
    pq_rescore_run_dict,
    name="product_name_pq_rescore"
)
evaluate(
    qrels=qrels, 
    run=product_name_pq_rescore_run, 
    metrics=["precision@25"]
)

0.8301666666666667

## Scalar Quantization (SQ)

In [12]:
client.update_collection(
    "wands-products",
    quantization_config=models.ScalarQuantization(
        scalar=models.ScalarQuantizationConfig(
            type=models.ScalarType.INT8,
            always_ram=True,
        )    
    ),
)

True

In [13]:
time.sleep(1.0)
collection = client.get_collection("wands-products")
while collection.status != models.CollectionStatus.GREEN:
    time.sleep(1.0)
    collection = client.get_collection("wands-products")
    
collection

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=85988, points_count=42994, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors={'product_description': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None), 'product_name': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None)}, shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=2, max_segment_size=None, memmap_thresh

In [14]:
response_times = []
sq_run_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"
    
    start_time = time.monotonic()
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                rescore=False # Disable re-scoring using the original vectors
            )
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
    )
    
    response_times.append(time.monotonic() - start_time)
    
    for point in results:
        document_id = f"doc_{point.id}"
        sq_run_dict[query_id][document_id] = point.score
    
np.mean(response_times)

0.0018092734071615268

In [15]:
product_name_sq_run = Run(
    sq_run_dict, 
    name="product_name_sq"
)
evaluate(
    qrels=qrels, 
    run=product_name_sq_run, 
    metrics=["precision@25"]
)

0.9986666666666667

In [16]:
response_times = []
sq_rescore_run_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"
    
    start_time = time.monotonic()
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                rescore=True # Enable re-scoring using the original vectors
            )
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
    )
    
    response_times.append(time.monotonic() - start_time)
    
    for point in results:
        document_id = f"doc_{point.id}"
        sq_rescore_run_dict[query_id][document_id] = point.score
    
np.mean(response_times)

0.0018514601953332507

In [17]:
product_name_sq_rescore_run = Run(
    sq_rescore_run_dict, 
    name="product_name_sq_rescore"
)
evaluate(
    qrels=qrels, 
    run=product_name_sq_rescore_run, 
    metrics=["precision@25"]
)

0.9988333333333334

## Binary Quantization (BQ)

In [18]:
client.update_collection(
    "wands-products",
    quantization_config=models.BinaryQuantization(
        binary=models.BinaryQuantizationConfig(
            always_ram=True,
        )    
    ),
)

True

In [19]:
time.sleep(1.0)
collection = client.get_collection("wands-products")
while collection.status != models.CollectionStatus.GREEN:
    time.sleep(1.0)
    collection = client.get_collection("wands-products")
    
collection

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=85988, points_count=42994, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors={'product_description': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None), 'product_name': VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None)}, shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=2, max_segment_size=None, memmap_thresh

In [20]:
response_times = []
bq_run_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"
    
    start_time = time.monotonic()
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                rescore=False # Disable re-scoring using the original vectors
            )
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
    )
    
    response_times.append(time.monotonic() - start_time)
    
    for point in results:
        document_id = f"doc_{point.id}"
        bq_run_dict[query_id][document_id] = point.score
    
np.mean(response_times)

0.00176750802929746

In [21]:
product_name_bq_run = Run(
    bq_run_dict, 
    name="product_name_bq"
)
evaluate(
    qrels=qrels, 
    run=product_name_bq_run, 
    metrics=["precision@25"]
)

0.7828333333333333

In [22]:
response_times = []
bq_rescore_run_dict = defaultdict(dict)
for id, row in queries_df.iterrows():
    query_id = f"query_{id}"
    
    start_time = time.monotonic()
    
    results = client.search(
        collection_name="wands-products",
        query_vector=models.NamedVector(
            name="product_name", 
            vector=row["query_embedding"]
        ),
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(
                rescore=True # Enable re-scoring using the original vectors
            )
        ),
        with_vectors=False,
        with_payload=False,
        limit=50,
    )
    
    response_times.append(time.monotonic() - start_time)
    
    for point in results:
        document_id = f"doc_{point.id}"
        bq_rescore_run_dict[query_id][document_id] = point.score
    
np.mean(response_times)

0.0017910990190406059

In [23]:
product_name_bq_rescore_run = Run(
    bq_rescore_run_dict, 
    name="product_name_bq_rescore"
)
evaluate(
    qrels=qrels, 
    run=product_name_bq_rescore_run, 
    metrics=["precision@25"]
)

0.9484999999999999

In [24]:
from ranx import compare

compare(
    qrels=qrels,
    runs=[
        product_name_pq_run, 
        product_name_pq_rescore_run, 
        product_name_sq_run, 
        product_name_sq_rescore_run, 
        product_name_bq_run, 
        product_name_bq_rescore_run,
    ],
    metrics=["precision@25"],
)

#    Model                    P@25
---  -----------------------  ---------
a    product_name_pq          0.645
b    product_name_pq_rescore  0.830·µÉ·µâ
c    product_name_sq          0.999·µÉ·µá·µâ·∂†
d    product_name_sq_rescore  0.999·µÉ·µá·µâ·∂†
e    product_name_bq          0.783·µÉ
f    product_name_bq_rescore  0.948·µÉ·µá·µâ