# QDRANT - Feedback Collection Quickstart

A quickstart notebook to get started with Qdrant

In [None]:
import os
from pprint import pprint
from src.utils.bigquery import query_bigquery
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance

from src.collection.set_collection import (
    create_vectors_from_data,
    create_collection,
    upsert_to_collection_from_vectors,
)
from src.sql_queries import query_labelled_feedback
from src.collection.query_collection import get_top_k_results

PUBLISHING_PROJECT_ID = os.getenv("PUBLISHING_PROJECT_ID")
LABELLED_FEEDBACK_DATASET = os.getenv("LABELLED_FEEDBACK_DATASET")
PUBLISHING_VIEW = os.getenv("PUBLISHING_VIEW")
OPENAI_LABELLED_FEEDBACK_TABLE = os.getenv("OPENAI_LABELLED_FEEDBACK_TABLE")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
HF_MODEL_NAME = os.getenv("HF_MODEL_NAME")

In [None]:
# Query BQ to pull the human labelled feedback data
query_read = query_labelled_feedback.replace(
    "@labelled_feedback_table", str(OPENAI_LABELLED_FEEDBACK_TABLE)
).replace("@PUBLISHING_VIEW", str(PUBLISHING_VIEW))

# Call the function to execute the query
docs = query_bigquery(
    PUBLISHING_PROJECT_ID,
    LABELLED_FEEDBACK_DATASET,
    query_read,
)

In [None]:
client = QdrantClient("localhost", port=6333)

In [None]:
collection_name = COLLECTION_NAME

create_collection(client, collection_name, size=768, distance_metric=Distance.DOT)

In [None]:
# Convert example data into PointStructs for upsertion
points_to_upsert = create_vectors_from_data(
    docs, id_key="feedback_record_id", embedding_key="embeddings"
)

In [None]:
type(points_to_upsert[0])

In [None]:
# Upsert data to collection
upsert_to_collection_from_vectors(client, collection_name, data=points_to_upsert)

In [None]:
# Clean up any stale collections
stale_collection_name = "stale_collection_name"
client.delete_collection(collection_name=f"{stale_collection_name}")

In [None]:
# Embed keyword for query search using a local sentence transformers model...
model = SentenceTransformer(HF_MODEL_NAME)

In [None]:
query_embedding = model.encode(["tax"])

In [None]:
len(query_embedding)

In [None]:
type(query_embedding)

In [None]:
# filter_key = "subject_page_path"
# filter_values = None
# filter_values = ["/government/publications/childcare-service-compensation"]

search_result = get_top_k_results(client, collection_name, query_embedding[0], k=5)

In [None]:
results = [dict(result) for result in search_result]

In [None]:
pprint(results)