# [Q-DRANT](https://qdrant.tech/documentation/quickstart/)

## Setup

### Run QDrant Using Docker

```sh
docker run -p 6333:6333 -p 6334:6334 \
    -v $(pwd)/qdrant_storage:/qdrant/storage:z \
    --name qdrant-db --rm \
    qdrant/qdrant
```

In [1]:
# Built-in library
from pathlib import Path
import re
import json
from typing import Any, Literal, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import numpy.typing as npt
from pprint import pprint
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=1_000)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [3]:
go_up_from_current_directory(go_up=2)

from QA_and_RAG import PACKAGE_ROOT_PATH
from QA_and_RAG.src.utils.utilities import ProcessFiles
from config import config, settings

/Users/neidu/Desktop/Projects/Personal/My_Projects/Gen-AI-Projects


In [5]:
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

In [6]:
model_name_or_path: str = "all-MiniLM-L6-v2"
encoder: SentenceTransformer = SentenceTransformer(
    model_name_or_path, cache_folder=config.QA_and_RAG.encoder_model.cache_folder
)

encoder.get_sentence_embedding_dimension()

384

### Add Dataset

In [None]:
fp: str = "../data/vector_db_data/music-sample-data.csv"
df: pl.DataFrame = pl.read_csv(fp)
# .with_columns(
#     metadata=pl.concat_str(
#         ["name", "sex", "ticket"],
#         separator=" || ",
#     )
# )

sample_df: pl.DataFrame = df.sample(df.shape[0], seed=1)
sample_df.head()

In [None]:
documents: list[dict[str, Any]] = sample_df.to_dicts()
sample_df.to_dicts()[:2]

### Create Client And Collection

In [8]:
from qdrant_client.http.exceptions import ResponseHandlingException

In [None]:
# host: str = "0.0.0.0"
# port: int = 6333
# api_key: str | None = None
# client = QdrantClient(url="http://localhost:6333")

client: QdrantClient = QdrantClient(
    host=settings.QDRANT_HOST,
    port=settings.QDRANT_PORT,
    api_key=settings.QDRANT_API_KEY.get_secret_value(),
    https=False,
)


try:
    client.get_collections().collections
    print("Qdrant server is running.")
except ResponseHandlingException as e:
    print(f"Qdrant server is not running. Error: {e}")

In [None]:
# Create collection
collection_name: str = "music_collection"
embedding_size: int = encoder.get_sentence_embedding_dimension()

if collection_name in [col.name for col in client.get_collections().collections]:
    print(f"Collection '{collection_name}' already exists.")

else:
    print(f"Creating collection '{collection_name}'.")
    client.create_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(
            size=embedding_size, distance=models.Distance.COSINE
        ),
    )

In [None]:
if client.collection_exists(collection_name=collection_name):
    print("Database exists")
else:
    print("Database does not exist")

### Upload Data to Qdrant

In [None]:
res = encoder.encode("This is a test.")

res.tolist()[:2]

In [None]:
def embed_document(document: str) -> list[float]:
    """Embed a document using an embedding model."""
    return encoder.encode(document).tolist()


len(embed_document("This is a test."))

In [None]:
client.upsert(
    collection_name=collection_name,
    points=[
        models.PointStruct(
            id=idx, vector=embed_document(doc["description"]), payload=doc
        )
        for idx, doc in enumerate(documents)
    ],
)

### Query The Vector Database

In [None]:
query: str = "rap battle"

hits = client.query_points(
    collection_name=collection_name, query=embed_document(query), limit=3
)
hits.points

In [None]:
print(f"Query: {query}")
for hit in hits.points:
    console.print(hit.payload, "score: ", hit.score)

#### Add Filters

- Narrow down the search results by adding filters to the query.

In [None]:
query_filter = models.Filter(
    must=[models.FieldCondition(key="year", range=models.Range(gte=2_000))]
)
# OR
query_filter = {
    "must": [
        {"key": "year", "range": {"gte": 2_000}},
    ]
}

hits = client.query_points(
    collection_name=collection_name,
    query=embed_document(query),
    query_filter=query_filter,
    limit=3,
)
hits

In [None]:
print(f"Query: {query}")
for hit in hits.points:
    console.print(hit.payload, "score: ", hit.score)

## Putting It Together

In [4]:
from QA_and_RAG.src.db_utils import VectorDBManager


files_dir: str = f"{config.QA_and_RAG.path}/data/vector_db_data/"
vector_db_manager: VectorDBManager = VectorDBManager(files_dir=files_dir)
vector_db_manager.run()

Processing file: /Users/neidu/Desktop/Projects/Personal/My_Projects/Gen-AI-Projects/QA_and_RAG/data/vector_db_data/music-sample-data.csv
Qdrant server is running.
Collection 'music-sample-data' already exists.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Data from 'music-sample-data' is saved into the vector database.
All csv/parquet files are saved into the vector database.


('',
 [{'role': 'assistant',
   'content': 'Uploaded files are ready. Please ask your question'}])

In [None]:
# vector_db_manager.encoder

vector_db_manager._get_collection_names()

In [None]:
# To Do
# 1. Add chunking to the vector store
# 2. Add filters to the vector store
# 3. Create the RAG pipeline.

In [125]:
class Singleton:
    _instance = None

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            # Create the first and only instance
            cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self, value):
        # This will only truly initialize the first time
        # self.value = value

        if not hasattr(self, "value"):
            self.value = value

    def __repr__(self):
        return f"{self.__class__.__name__}({self.value})"


# Usage
# a = Singleton("first")
# b = Singleton("second")
# print(a.value)  # Prints: "first"
# print(b.value)  # Prints: "first"
# print(a is b)  # Prints: True

In [None]:
Singleton("a")

In [162]:
from typing import ClassVar, Self


def owner_info() -> tuple[str, int]:
    return ("Donald", 0)


class Dog:
    _instance: None | Self = None
    _owner: str | None = None
    _num_of_dogs: int | None = None

    def __new__(cls, *args, **kwargs) -> Self:
        if cls._instance is None:
            print(f"Creating a new {cls.__name__} instance.")
            cls._instance = super().__new__(cls)
            cls._instance._get_owner_info()
        return cls._instance

    def __init__(self, name: str, age: int) -> None:
        if not hasattr(self, "name"):
            self.name = name
        if not hasattr(self, "age"):
            self.age = age

        Dog._num_of_dogs += 1

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, age={self.age})"

    @classmethod
    def _get_owner_info(cls) -> str:
        cls._owner, cls._num_of_dogs = owner_info()

    def talk(self) -> None:
        print("Dog is making a woof sound!")

    def walk(self) -> None:
        print("Dog is walking.")

    def get_owner_info(self) -> str:
        return f"The owner is {Dog._owner} and he has {Dog._num_of_dogs} dogs."

    @property
    def owner(self) -> str:
        return self._owner

    @property
    def num_of_dogs(self) -> int:
        return self._num_of_dogs


class Cat:
    _num_of_cats: ClassVar[int] = 0
    _owner: str | None = None

    def __init__(self, name: str, age: int) -> None:
        self.name = name
        self.age = age
        Cat._num_of_cats += 1

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, age={self.age})"

    @classmethod
    def _get_owner_info(cls) -> str:
        cls._owner, cls._num_of_cats = owner_info()

    def talk(self) -> None:
        print("Cat is making a woof sound!")

    def walk(self) -> None:
        print("Cat is walking.")

    def get_owner_info(self) -> str:
        return f"The owner is {Cat._owner} and he has {Cat._num_of_cats} Cats."

    @property
    def owner(self) -> str:
        return self._owner

    @property
    def num_of_cats(self) -> int:
        return self._num_of_cats

In [None]:
dog_1: Dog = Dog(name="a", age=2)
dog_1.talk()
print(dog_1)
dog_1.get_owner_info()

In [None]:
dog_2: Dog = Dog(name="alk", age=4)
dog_2.talk()
print(dog_2)
dog_2.get_owner_info()

In [None]:
dog_1 == dog_2

In [None]:
print(dog_1, dog_2)

In [None]:
from typing import ClassVar, Self


def owner_info() -> tuple[str, int]:
    return ("Donald", 0)


class Dog:
    _instance: None | Self = None
    _owner: str | None = None
    _num_of_dogs: int | None = None

    def __new__(cls, *args, **kwargs) -> Self:
        if cls._instance is None:
            print(f"Creating a new {cls.__name__} instance.")
            cls._instance = super().__new__(cls)
            cls._instance._get_owner_info()
        return cls._instance

    def __init__(self, name: str, age: int) -> None:
        self.name = name
        self.age = age
        Dog._num_of_dogs += 1

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, age={self.age})"

    @classmethod
    def _get_owner_info(cls) -> str:
        cls._owner, cls._num_of_dogs = owner_info()

    def talk(self) -> None:
        print("Dog is making a woof sound!")

    def walk(self) -> None:
        print("Dog is walking.")

    def get_owner_info(self) -> str:
        return f"The owner is {Dog._owner} and he has {Dog._num_of_dogs} dogs."

    @property
    def owner(self) -> str:
        return self._owner

    @property
    def num_of_dogs(self) -> int:
        return self._num_of_dogs


# Usage
dog_1: Dog = Dog(name="a", age=2)
dog_1.talk()
print(dog_1)
dog_1.get_owner_info()
# Result:
# Creating a new Dog instance.
# Dog is making a woof sound!
# Dog(name=a, age=2)
# 'The owner is Donald and he has 1 dogs.'


dog_2: Dog = Dog(name="alk", age=4)
dog_2.talk()
print(dog_2)
dog_2.get_owner_info()
# Result:
# Dog is making a woof sound!
# Dog(name=alk, age=4)
# 'The owner is Donald and he has 2 dogs.'

print(dog_1, dog_2)
# Result:
# Dog(name=alk, age=4) Dog(name=alk, age=4)

In [None]:
cat_1: Cat = Cat(name="a", age=2)
cat_1.bark()
cat_1.get_owner_info()


cat_2: Cat = Cat(name="b", age=5)
cat_2.bark()
cat_2.get_owner_info()


cat_1 == cat_2

In [112]:
from typing import ClassVar, Self


def owner_info() -> tuple[str, int]:
    return ("Donald", 0)


class Dog:
    _instance: None | Self = None
    _owner: str | None = None
    _num_of_dogs: int | None = 0

    def __init__(self, name: str, age: int) -> None:
        self.name = name
        self.age = age
        Dog._num_of_dogs += 1

    def __new__(cls, *args, **kwargs) -> Self:
        if cls._instance is None:
            print(f"Creating a new {cls.__name__} instance.")
            cls._instance = super().__new__(cls)
            cls._instance._get_owner_info()
        return cls._instance

    @classmethod
    def _get_owner_info(cls) -> str:
        cls._owner, cls._num_of_dogs = owner_info()

    def bark(self) -> None:
        print(
            f"{self.__class__.__name__}(name: {self.name}, age: {self.age}) is making a woof sound!"
        )

    def walk(self) -> None:
        print(
            f"{self.__class__.__name__}(name: {self.name},  age: {self.age}) is walking."
        )

    def get_owner_info(self) -> str:
        print(
            f"Getting owner info of {self.__class__.__name__}(name: {self.name},  age: {self.age}) ..."
        )
        return f"The owner is {Dog._owner} and he has {Dog.num_of_dogs} dogs."

    @property
    def owner(self) -> str:
        return self._owner

    @property
    def num_of_dogs(self) -> int:
        return self._num_of_dogs

In [None]:
dog_1: Dog = Dog(name="Rex", age=3)
dog_1.bark()
dog_1.get_owner_info()

In [None]:
Dog._owner

In [None]:
# dog_1.get_owner_info()
dog_1.owner

In [None]:
dog_2: Dog = Dog(name="Buddy", age=5)
dog_2.bark()
dog_2.get_owner_info()

In [None]:
dog_1.get_owner_info()