In [1]:
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer


In [2]:
QDRANT_CONNECTION = 'http://20.163.60.98:6333'
EMBEDDING_MODEL = 'all-MiniLM-L6-v2'
COLLECTION_NAME = 'GRC_Documents_Large'

In [3]:
model = SentenceTransformer(EMBEDDING_MODEL)
qdrant_client = QdrantClient(url=QDRANT_CONNECTION)

In [4]:
def query_qdrant(query: str):
    """
    Query the Qdrant database with a given query string.
    """
    # Create embeddings for the query
    query_embedding = model.encode(query).tolist()
    
    # Perform the search in Qdrant
    results = qdrant_client.query_points(
        collection_name=COLLECTION_NAME,
        query=query_embedding,
        limit=300,
        with_payload=True,
    )
    
    return results

In [5]:
collection_info = qdrant_client.get_collection(COLLECTION_NAME)
print(f"Vectors count: {collection_info.vectors_count}")
print(f"Points count: {collection_info.points_count}")
print(f"Status: {collection_info.status}")

Vectors count: None
Points count: 2566618
Status: green


In [10]:
# Example query
query = "How did PG&E plan for the 2023 year?"
response = query_qdrant(query)
# Inspect the response object
distances = [point.score for point in response.points]
print(response.points[0])
for point in response.points[0:10]:  # Display first 10 results
    print(f"ID: {point.id}, Score: {point.score}\nPayload: {point.payload['text']}, Chunk Length: {len(point.payload['text'])}\n\n")
#     # You can access other attributes of the point as needed



id='63450258-b988-5cb1-bc54-e963feec5d29' version=968 score=0.7568855 payload={'chunk_index': 76, 'document_id': '562082344', 'proceeding_id': 'A2504004', 'source_url': 'https://docs.cpuc.ca.gov/PublishedDocs/Efile/G000/M562/K082/562082344.PDF', 'published_date': '04/09/2025', 'year': 0, 'title': 'Application filed by Pacific Gas and Electric Company on 04/09/2025 Conf# 217053', 'doc_type': 'Application', 'text': 'the United States Securities and Exchange Commission on February 13, 2025 for the period \nending December 31, 2024.  PG&E believes that its utility operations will continue to generate \nsubstantial cash with which to fund its construction activities, including the Project.'} vector=None shard_key=None order_value=None
ID: 63450258-b988-5cb1-bc54-e963feec5d29, Score: 0.7568855
Payload: the United States Securities and Exchange Commission on February 13, 2025 for the period 
ending December 31, 2024.  PG&E believes that its utility operations will continue to generate 
substa