# [Q-DRANT](https://qdrant.tech/documentation/quickstart/)

In [1]:
# Built-in library
from pathlib import Path
import re
import json
from typing import Any, Literal, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
import numpy.typing as npt
from pprint import pprint
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as plt

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)
pl.Config.set_tbl_rows(n=1_000)

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [3]:
go_up_from_current_directory(go_up=2)

from QA_and_RAG import PACKAGE_ROOT_PATH
from QA_and_RAG.src.utils.utilities import ProcessFiles
from config import config, settings

/Users/neidu/Desktop/Projects/Personal/My_Projects/Gen-AI-Projects


In [4]:
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

In [13]:
model_name_or_path: str = "all-MiniLM-L6-v2"
encoder: SentenceTransformer = SentenceTransformer(model_name_or_path)

encoder.get_sentence_embedding_dimension()

384

### Add Dataset

In [18]:
fp: str = "../data/flat_files/titanic-data.csv"
df: pl.DataFrame = pl.read_csv(fp).with_columns(
    metadata=pl.concat_str(
        ["name", "sex", "ticket"],
        separator=" || ",
    )
)

sample_df: pl.DataFrame = df.sample(20, seed=1)
sample_df

pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,metadata
i64,i64,str,str,f64,i64,i64,str,f64,str,str,str,i64,str,str
3,0,"""Keane, Mr. Andrew ""Andy""""","""male""",,0,0,"""12460""",7.75,,"""Q""",,,,"""Keane, Mr. Andrew ""Andy"" || male || 12460"""
2,0,"""Hickman, Mr. Lewis""","""male""",32.0,2,0,"""S.O.C. 14879""",73.5,,"""S""",,256.0,"""West Hampstead, London / Neepawa, MB""","""Hickman, Mr. Lewis || male || S.O.C. 14879"""
3,1,"""Tenglin, Mr. Gunnar Isidor""","""male""",25.0,0,0,"""350033""",7.7958,,"""S""","""13 15""",,,"""Tenglin, Mr. Gunnar Isidor || male || 350033"""
2,0,"""Pernot, Mr. Rene""","""male""",,0,0,"""SC/PARIS 2131""",15.05,,"""C""",,,,"""Pernot, Mr. Rene || male || SC/PARIS 2131"""
3,0,"""Kelly, Mr. James""","""male""",34.5,0,0,"""330911""",7.8292,,"""Q""",,70.0,,"""Kelly, Mr. James || male || 330911"""
3,0,"""Davison, Mr. Thomas Henry""","""male""",,1,0,"""386525""",16.1,,"""S""",,,"""Liverpool, England Bedford, OH""","""Davison, Mr. Thomas Henry || male || 386525"""
3,1,"""de Messemaeker, Mrs. Guillaume Joseph (Emma)""","""female""",36.0,1,0,"""345572""",17.4,,"""S""","""13""",,"""Tampico, MT""","""de Messemaeker, Mrs. Guillaume Joseph (Emma) || female || 345572"""
1,0,"""Robbins, Mr. Victor""","""male""",,0,0,"""PC 17757""",227.525,,"""C""",,,,"""Robbins, Mr. Victor || male || PC 17757"""
1,1,"""Peuchen, Major. Arthur Godfrey""","""male""",52.0,0,0,"""113786""",30.5,"""C104""","""S""","""6""",,"""Toronto, ON""","""Peuchen, Major. Arthur Godfrey || male || 113786"""
3,0,"""Gallagher, Mr. Martin""","""male""",25.0,0,0,"""36864""",7.7417,,"""Q""",,,"""New York, NY""","""Gallagher, Mr. Martin || male || 36864"""


In [19]:
documents: list[dict[str, Any]] = sample_df.to_dicts()
sample_df.to_dicts()[:2]

[{'pclass': 3,
  'survived': 0,
  'name': 'Keane, Mr. Andrew "Andy"',
  'sex': 'male',
  'age': None,
  'sibsp': 0,
  'parch': 0,
  'ticket': '12460',
  'fare': 7.75,
  'cabin': None,
  'embarked': 'Q',
  'boat': None,
  'body': None,
  'home.dest': None,
  'metadata': 'Keane, Mr. Andrew "Andy" || male || 12460'},
 {'pclass': 2,
  'survived': 0,
  'name': 'Hickman, Mr. Lewis',
  'sex': 'male',
  'age': 32.0,
  'sibsp': 2,
  'parch': 0,
  'ticket': 'S.O.C. 14879',
  'fare': 73.5,
  'cabin': None,
  'embarked': 'S',
  'boat': None,
  'body': 256,
  'home.dest': 'West Hampstead, London / Neepawa, MB',
  'metadata': 'Hickman, Mr. Lewis || male || S.O.C. 14879'}]

### Create Client And Collection

In [32]:
client = QdrantClient(url="http://localhost:6333")

# Create collection
collection_name: str = "titanic"
embedding_size: int = encoder.get_sentence_embedding_dimension()

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=embedding_size, distance=models.Distance.COSINE
    ),
)

True

### Upload Data to Qdrant

In [25]:
res = encoder.encode("This is a test.")

In [35]:
res.tolist()[:2]

[0.03359076753258705, 0.010512457229197025]

In [37]:
def embed_document(document: str) -> list[float]:
    """Embed a document using an embedding model."""
    return encoder.encode(document).tolist()


len(embed_document("This is a test."))

384

In [36]:
client.upsert(
    collection_name=collection_name,
    points=[
        models.PointStruct(id=idx, vector=embed_document(doc["metadata"]), payload=doc)
        for idx, doc in enumerate(documents)
    ],
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
### Query 