In [2]:
from conf.constants import QDRANT_KEY, QDRANT_URL
from openai import OpenAI
from qdrant_client import QdrantClient

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

# create an embedding using openai
def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   resp = OpenAI().embeddings.create(input = [text], model=model)
   return resp.data[0].embedding

# query the vector store
def query_qdrant(query, collection_name, top_k=5):
    
    embedded_query = get_embedding(text=query)

    qdrant_client = QdrantClient(
        QDRANT_URL,
        api_key=QDRANT_KEY,
    )
    
    query_results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=(embedded_query),
        limit=top_k,
    )
    
    return query_results


In [19]:
COLLECTION = "quarkus_reference"
QUERY = "JMS"
NUM_RESULTS = 100
query_results = query_qdrant(        
    query=QUERY, 
    collection_name=COLLECTION,
    top_k=NUM_RESULTS
    )

df = pd.DataFrame(columns=['id', 'score', 'page_ref', 'entities', 'content', 'content_size'])

for i, article in enumerate(query_results):    
    data = {
        "id": article.id, 
        "score": round(article.score, 3),
        "page_ref": article.payload["metadata"]["page_number"],
        "entities": article.payload["metadata"]["entities"],
        "content": article.payload["page_content"],              
        "content_size": len(article.payload["page_content"])  
    }
    df_new_rows = pd.DataFrame(data, index=[i])
    df = pd.concat([df, df_new_rows])    

large_items = df[df["content_size"]>4000]
large_items.head()


Unnamed: 0,id,score,page_ref,entities,content,content_size


In [18]:
# delete the large items
from qdrant_client.models import PointIdsList
client = QdrantClient(
        QDRANT_URL,
        api_key=QDRANT_KEY,
    )

client.delete(
    collection_name=COLLECTION,
    points_selector= PointIdsList(
        points=["45bc76b7-77c7-4225-8aa0-1a1631c3f447"],
    ),
)

UpdateResult(operation_id=210, status=<UpdateStatus.COMPLETED: 'completed'>)