In [57]:
import streamlit as st
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, SparseVector, SparseIndexParams, PayloadSchemaType, PointStruct
from qdrant_client.http import exceptions as qdrant_exceptions

# CONFIG: qdrant
api_key = st.secrets["QDRANT_API_KEY"]
url = st.secrets["QDRANT_URL"]

### Check the Connection to Qdrant

In [53]:
client = QdrantClient(
    url=url,
    api_key=api_key,
)

try:
    '''usage'''
    content = client.get_collections()
    print(content)
except qdrant_exceptions.UnexpectedResponse as e:
    # Check if the error is a 404 Not Found
    if "404" in str(e):
        print("The server returned a 404 Not Found error, which indicates the server is active but could not find the requested URL or endpoint. This might be due to a wrong URL, an incorrect path, or a resource that doesn't exist.")
    else:
        # Re-raise the error if it's not a 404
        raise
except Exception as e:
    # Handle any other exceptions that may occur
    print(f"An unexpected error occurred: {e}")

collections=[CollectionDescription(name='ask_pdf_pages'), CollectionDescription(name='ASK_vectorstore'), CollectionDescription(name='ask_pdf_docs')]


### Example: Add a PDF to Qdrants by hand

### Add a fictional record for ask_pdf_doc

In [70]:
pdf_id = "002552f4-ccde-4518-bb5d-e861c7188653"


def check_id_exists_in_qdrant(pdfs_collection_name, pdf_id):

    try:
        existing_data = client.retrieve(
            collection_name=pdfs_collection_name,
            ids=[pdf_id],
            with_payload=False,  # change to True to see the payload
            with_vectors=False,  # change to True to see the vectors
        )

        return len(existing_data) != 0  # Returns False if 0, else True

    except Exception as e:
        print(f"Error checking for the ID in Qdrant: {e}")
        return False


check_id_exists_in_qdrant("ask_pdf_docs", pdf_id)

False

In [68]:
pdf_id = "002552f4-ccde-4518-bb5d-e861c7188653"

sample_dense_vector = [0] * 1536

sample_sparse_values = [1.0, 0.5, 0.8]
sample_sparse_indices = [0, 1, 2]
# sample_sparse_vector = {"indices": sample_sparse_indices, "values": sample_sparse_values}

### Add a fictional record for ask_pdf_doc

In [69]:
from datetime import datetime


# Example document metadata for ask_pdf_docs
document_metadata = {
    "title": "Fictional Coast Guard Directive",
    "leadership_scope": "1_National",
    "creation_date": datetime(2023, 1, 15).isoformat(),  # Example timestamp
    "effective_date": datetime(2023, 2, 1).isoformat(),
    "upsert_date": datetime.now().isoformat(),  # Current timestamp for upsert date
    # 10 years in the future
    "expiration_date": datetime(2033, 2, 1).isoformat(),
    "aux_specific": True,
    "public_release": False,
    "publication_number": "COMDTINST_M1000.6A",
    "source": "uscg.mil",
    "organization": "CG-BSX-1",
    "curator": "Smith",
    "file_name": "fictional_directive.pdf",
    "summary": "A one paragraph summary of the document",
}

# Insert the fictional document metadata into the ask_pdf_docs collection
client.upsert(
    collection_name="ask_pdf_pages",
    points=[
        PointStruct(
            id=pdf_id,
            payload=document_metadata,
            vector={
                "text-dense": sample_dense_vector,
                "text-sparse": SparseVector(
                    indices=sample_sparse_indices,
                    values=sample_sparse_values
                )
            }
        )
    ]
)

UpdateResult(operation_id=26, status=<UpdateStatus.COMPLETED: 'completed'>)

### Add a fictional record for ask_pdf_pages

In [74]:

page_metadata = {
    "content": "Nice job, Drew. This is the content of page 1",
    "title": "Important Coast Guard Document",
    "page_number": 1,
    "publication_number": "COMDTINST_M9999.6X",
    # Link to the parent document (UUID)
    "pdf_doc_id": "002552f4-ccde-4518-bb5d-e861c7188653",
}


client.upsert(
    collection_name="ask_pdf_pages",
    points=[
        PointStruct(
            id=pdf_id,
            payload=document_metadata,
            vector={
                "text-dense": sample_dense_vector,
                "text-sparse": SparseVector(
                    indices=sample_sparse_indices,
                    values=sample_sparse_values
                )
            }
        )
    ]
)


'''for just a single vector and not hybrid search
client.upsert(
    collection_name="ask_pdf_pages",
    points=[{
        "vector": sample_dense_vector,  # Page embedding vector
        "payload": page_metadata
    }]
)
'''

'for just a single vector and not hybrid search\nclient.upsert(\n    collection_name="ask_pdf_pages",\n    points=[{\n        "vector": sample_dense_vector,  # Page embedding vector\n        "payload": page_metadata\n    }]\n)\n'

### See some records

In [77]:
all_records = client.scroll(
    collection_name="ask_pdf_pages",
    limit=100000,
    with_payload=True,  # change to True to see the payload
    with_vectors=False  # change to True to see the vectors
)

print(f"""Number of records in collection "{"ask_pdf_pages"}": {len(all_records[0])}
      
The first record is:""")
(all_records[0])  # see the first point#

Number of records in collection "ask_pdf_pages": 1
      
The first record is:


[Record(id='002552f4-ccde-4518-bb5d-e861c7188653', payload={'title': 'Fictional Coast Guard Directive', 'leadership_scope': '1_National', 'creation_date': '2023-01-15T00:00:00', 'effective_date': '2023-02-01T00:00:00', 'upsert_date': '2024-09-30T13:56:32.745126', 'expiration_date': '2033-02-01T00:00:00', 'aux_specific': True, 'public_release': False, 'publication_number': 'COMDTINST_M1000.6A', 'source': 'uscg.mil', 'organization': 'CG-BSX-1', 'curator': 'Smith', 'file_name': 'fictional_directive.pdf', 'summary': 'A one paragraph summary of the document'}, vector=None, shard_key=None, order_value=None)]