In [27]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
import openai
import yaml



In [28]:
with open("config.yaml", "r") as stream:
    try:
        PARAM = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [29]:
EMBEDDING_MODEL = "text-embedding-ada-002"
openai.api_key = PARAM["OPENAI_API_KEY"]

In [30]:
def get_embedding(text, model=EMBEDDING_MODEL):
   text = text.replace("\n", " ")
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']

In [31]:
embedding = get_embedding("Document effects of Gaucher disease in different systems of body, including nervous system, liver, and spleen.,Document adverse events subjects experience on enzyme replacement therapy,Document adverse events of subjects on substrate reduction therapy,Document long-term complications in Gaucher Disease.,Change in 36-Item Short Form Survey (SF-36) collected every 6 months/1 year.,Change in Small Fiber Neuropathy Screening List (SFNSL) collected every 6 months/1 year.,Document number of subjects experiencing neurological symptoms related to Gaucher, by using Neurological Follow-up exam,Change in Parkinson's checklist collected every 6 months/1 year")

In [32]:
len(embedding)

1536

In [33]:
qdrant_client = QdrantClient(
    url=PARAM["qdrant_URL"], 
    api_key=PARAM["qdrant_API_KEY"],
)

In [49]:
qdrant_client.recreate_collection(
    collection_name="outcome_measures",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

True

In [50]:
database = {"doc 1": "Document effects of Gaucher disease in different systems of body, including nervous system, liver, and spleen.,Document adverse events subjects experience on enzyme replacement therapy,Document adverse events of subjects on substrate reduction therapy,Document long-term complications in Gaucher Disease.,Change in 36-Item Short Form Survey (SF-36) collected every 6 months/1 year.,Change in Small Fiber Neuropathy Screening List (SFNSL) collected every 6 months/1 year.,Document number of subjects experiencing neurological symptoms related to Gaucher, by using Neurological Follow-up exam,Change in Parkinson's checklist collected every 6 months/1 year", 
            "doc 2": "Measure cardiovascular stability and electrical brain activity during slow induction with sevoflurane.,Use cardiovascular and electrical brain measurements to limit amount of sevoflurane and predict individual sensitivity."}



In [51]:

qdrant_client.upsert(
    collection_name="outcome_measures",
    points=[
        PointStruct(
            id=index,
            vector= get_embedding(database[key]),
            payload={"snomed": key}
        )
        for index, key in enumerate(database)
    ]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [None]:
def query_vector_db(text, collection_name, top_k=5):
    embedding = get_embedding(text)
    hits = qdrant_client.search(
        collection_name=collection_name,
        vector=embedding,
        limit=5
    )
    return hits

In [52]:
query = "Gaucher disease in different systems of body"
query_vector = get_embedding(query)

In [53]:
hits = qdrant_client.search(
    collection_name="outcome_measures",
    query_vector=query_vector,
    limit=5  # Return 5 closest points
)

In [57]:
hits

[ScoredPoint(id=0, version=0, score=0.87408036, payload={'snomed': 'doc 1'}, vector=None),
 ScoredPoint(id=1, version=0, score=0.7137416, payload={'snomed': 'doc 2'}, vector=None)]

In [55]:
hits[0].payload

{'snomed': 'doc 1'}