# [Q-DRANT](https://qdrant.tech/documentation/quickstart/)

## Setup

### Run QDrant Using Docker

```sh
docker run -p 6333:6333 -p 6334:6334 \
    -v $(pwd)/qdrant_storage:/qdrant/storage:z \
    --name qdrant-db --rm \
    qdrant/qdrant
```

In [1]:
# Built-in library
from pathlib import Path
import re
import json
from typing import Any, Literal, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import numpy.typing as npt
from pprint import pprint
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=1_000)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [3]:
go_up_from_current_directory(go_up=2)

from QA_and_RAG import PACKAGE_ROOT_PATH
from QA_and_RAG.src.utils.utilities import ProcessFiles
from config import config, settings

/Users/neidu/Desktop/Projects/Personal/My_Projects/Gen-AI-Projects


In [4]:
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

In [5]:
model_name_or_path: str = "all-MiniLM-L6-v2"
encoder: SentenceTransformer = SentenceTransformer(model_name_or_path)

encoder.get_sentence_embedding_dimension()

384

### Add Dataset

In [6]:
fp: str = "../data/flat_files/sample-data.csv"
df: pl.DataFrame = pl.read_csv(fp)
# .with_columns(
#     metadata=pl.concat_str(
#         ["name", "sex", "ticket"],
#         separator=" || ",
#     )
# )

sample_df: pl.DataFrame = df.sample(df.shape[0], seed=1)
sample_df.head()

title,description,genre,artist,year
str,str,str,str,i64
"""'Imagine'""","""'A peaceful anthem advocating for world peace'""","""'Rock'""","""'John Lennon'""",1971
"""'Billie Jean'""","""'A pop classic with a memorable bassline'""","""'Pop'""","""'Michael Jackson'""",1982
"""'Juicy'""","""'A classic East Coast rap anthem'""","""'Rap'""","""'The Notorious B.I.G.'""",1994
"""'Still D.R.E.'""","""'A comeback rap song with a iconic beat'""","""'Rap'""","""'Dr. Dre'""",1999
"""'Sweet Child O' Mine'""","""'A hard rock anthem with a distinctive opening riff'""","""'Rock'""","""'Guns N' Roses'""",1987


In [7]:
documents: list[dict[str, Any]] = sample_df.to_dicts()
sample_df.to_dicts()[:2]

[{'title': "'Imagine'",
  'description': "'A peaceful anthem advocating for world peace'",
  'genre': "'Rock'",
  'artist': "'John Lennon'",
  'year': 1971},
 {'title': "'Billie Jean'",
  'description': "'A pop classic with a memorable bassline'",
  'genre': "'Pop'",
  'artist': "'Michael Jackson'",
  'year': 1982}]

### Create Client And Collection

In [13]:
from qdrant_client.http.exceptions import ResponseHandlingException

In [17]:
client = QdrantClient(url="http://localhost:6333")

try:
    client.get_collections().collections
    print("Qdrant server is running.")
except ResponseHandlingException as e:
    print(f"Qdrant server is not running. Error: {e}")

Qdrant server is running.


In [20]:
collection_name: str = "music_collection"


if collection_name in client.get_collections().collections:
    print(f"Collection '{collection_name}' already exists.")

In [25]:
client.get_collections().model_dump()

dir(client.get_collections())

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__class_vars__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_pydantic_core_schema__',
 '__get_pydantic_json_schema__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__pretty__',
 '__private_attributes__',
 '__pydantic_complete__',
 '__pydantic_computed_fields__',
 '__pydantic_core_schema__',
 '__pydantic_custom_init__',
 '__pydantic_decorators__',
 '__pydantic_extra__',
 '__pydantic_fields__',
 '__pydantic_fields_set__',
 '__pydantic_generic_metadata__',
 '__pydantic_init_subclass__',
 '__pydantic_parent_namespace__',
 '__pydantic_post_init__',
 '__pydantic_private__',
 '__pydantic_root_model__',
 '__pydantic_serializer__',
 '__pydantic_validator__',

In [18]:
collection_name: str = "music_collection"
embedding_size: int = encoder.get_sentence_embedding_dimension()

client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=embedding_size, distance=models.Distance.COSINE
    ),
)

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `music_collection` already exists!"},"time":0.009734788}'

In [None]:

# Create collection
collection_name: str = "music_collection"
embedding_size: int = encoder.get_sentence_embedding_dimension()

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=embedding_size, distance=models.Distance.COSINE
    ),
)

### Upload Data to Qdrant

In [None]:
res = encoder.encode("This is a test.")

res.tolist()[:2]

In [None]:
def embed_document(document: str) -> list[float]:
    """Embed a document using an embedding model."""
    return encoder.encode(document).tolist()


len(embed_document("This is a test."))

In [None]:
client.upsert(
    collection_name=collection_name,
    points=[
        models.PointStruct(
            id=idx, vector=embed_document(doc["description"]), payload=doc
        )
        for idx, doc in enumerate(documents)
    ],
)

### Query The Vector Database

In [None]:
query: str = "rap battle"

hits = client.query_points(
    collection_name=collection_name, query=embed_document(query), limit=3
)
hits.points

In [None]:
print(f"Query: {query}")
for hit in hits.points:
    console.print(hit.payload, "score: ", hit.score)

#### Add Filters

- Narrow down the search results by adding filters to the query.

In [None]:
query_filter = models.Filter(
    must=[models.FieldCondition(key="year", range=models.Range(gte=2_000))]
)
# OR
query_filter = {
    "must": [
        {"key": "year", "range": {"gte": 2_000}},
    ]
}

hits = client.query_points(
    collection_name=collection_name,
    query=embed_document(query),
    query_filter=query_filter,
    limit=3,
)
hits

In [None]:
print(f"Query: {query}")
for hit in hits.points:
    console.print(hit.payload, "score: ", hit.score)