# QDRANT EMBEDDINGS

* Fastembed with Qdrant: https://github.com/qdrant/fastembed
* Qdrant API reference: https://api.qdrant.tech/api-reference/collections/get-collection

In [66]:
from dotenv import load_dotenv
load_dotenv()
import os
import pandas as pd
from openai import OpenAI
from qdrant_client import QdrantClient, models
import pandas as pd 
import time
import numpy as np

GPT_MODEL = "gpt-4o"
EMBEDDING_MODEL = "text-embedding-ada-002"
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

cluster_name = "m47_testing_cluster"
qdrant_client = QdrantClient(
    url=os.getenv("QDRANT_CLUSTER_URL"), 
    api_key=os.getenv("QDRANT_API_KEY"),
)

### 1. **Create collection:**

In [59]:
collection_name = "voicebot_test_collection"
vector_size = 384 # text-embedding-ada-002: 1536


try:    
    qdrant_client.delete_collection(collection_name=collection_name)
except Exception as e:
    print(e)

qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config={
        "qa_pairs": models.VectorParams(size=vector_size, distance=models.Distance.COSINE) #COSINE, EUCLID, DOT, MANHATTAN
    },
)

True

### 2. **Populate Collection:**

In [57]:
knowledge_base = pd.read_csv("knowledge_base.csv")
knowledge_base

Unnamed: 0,Question,Answer
0,I have a question about an open repair request...,We would like to advise you to use the request...
1,I have a question about an outstanding invoice...,Have you used our HIS system and/or the invoic...
2,"Hello, my name is XXX from the car dealership ...","I'm sorry, I can't help you with this question..."
3,"Hello, my name is XXX from the car dealership ...",We would like to advise you to use the enquiry...
4,I have a question about the repair of a differ...,Please use the postmaster for this enquiry or ...
5,I have a question about a part of the contract?,"I'm sorry, I can't help you with this question..."
6,"Hello, my name is XXX and my petrol card no lo...","If you have any questions about fuel cards, pl..."
7,"Hello, my name is XXX and I have a question ab...",Thank you for your enquiry. If you have any qu...
8,"Hello, my name is XXX and I have received a ca...","I'm sorry, I can't help you with this question..."
9,I have a question about an outstanding repair ...,"I'm sorry, I can't help you with this question..."


**With OpenAI Embeddings:**

In [58]:
start_time=time.time()

qdrant_client.upsert(
    collection_name=collection_name,
    points=[
        models.PointStruct(
            id=id,
            vector={
                "qa_pairs": openai_client.embeddings.create(input = f"{qa_pair.Question} {qa_pair.Answer}",model = EMBEDDING_MODEL).data[0].embedding,
            },
            payload={"question": qa_pair.Question,
                     "answer": qa_pair.Answer},
        )
        for id, qa_pair in knowledge_base.iterrows()
    ],
)

end_time = time.time()
latency_per_vector = (end_time-start_time)/len(knowledge_base)
print(f"Avg. Latency per embedding ({len(knowledge_base)} emb.): {latency_per_vector}")


Avg. Latency per embedding (13 emb.): 0.4667730148021991


**With Fast Embeddings:**
* Check supported models [here](https://qdrant.github.io/fastembed/examples/Supported_Models/#supported-text-embedding-models)

In [60]:
from fastembed import TextEmbedding

model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5") #dim = 384

start_time=time.time()

qdrant_client.upsert(
    collection_name=collection_name,
    points=[
        models.PointStruct(
            id=id,
            vector={
                "qa_pairs": list(model.embed(documents=f"{qa_pair.Question} {qa_pair.Answer}"))[0].tolist(),
            },
            payload={"question": qa_pair.Question,
                     "answer": qa_pair.Answer},
        )
        for id, qa_pair in knowledge_base.iterrows()
    ],
)

end_time = time.time()
latency_per_vector = (end_time-start_time)/len(knowledge_base)
print(f"Avg. Latency per embedding ({len(knowledge_base)} emb.): {latency_per_vector}")

Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 48099.82it/s]


Avg. Latency per embedding (13 emb.): 0.062459542201115534


### 3. **Search:**

In [61]:
QUESTION = "I have a question about an open repair request"

**With OpenAI Embeddings:**

In [51]:
start_time = time.time()
query_vector = openai_client.embeddings.create(input = f"{QUESTION}",model = EMBEDDING_MODEL).data[0].embedding
end_time=time.time()
latency = end_time-start_time
print(f"Latency embed query: {latency:.3f} seconds")

start_time = time.time()
top_k_vectors = qdrant_client.search(
    collection_name=collection_name,
    # query_filter=models.Filter(
    #     must=[
    #         models.FieldCondition(
    #             key="city",
    #             match=models.MatchValue(
    #                 value="London",
    #             ),
    #         )
    #     ]
    # ),
    query_vector=("qa_pairs", query_vector),
    limit=3,
)
end_time=time.time()
latency = end_time-start_time
print(f"Latency Search Index: {latency:.3f} seconds")

for vector in top_k_vectors:
    print(f"Score: {vector.score}\n\t{vector.payload}")


Latency embed query: 0.315 seconds
Latency Search Index: 0.230 seconds
Score: 0.8768959
	{'question': 'I have a question about an open repair request for one of our customers.', 'answer': 'We would like to advise you to use the request process and the systems provided by us (iSport). This will document your response to the rejected enquiry.'}
Score: 0.83825827
	{'question': 'I have a question about an outstanding repair enquiry under a warranty insurance policy for one of our customers.', 'answer': "I'm sorry, I can't help you with this question. Would you like to be forwarded to the agent? "}
Score: 0.81944686
	{'question': 'I have a question about a rejected invoice under a warranty insurance policy for one of our customers.', 'answer': "I'm sorry, I can't help you with this question. Would you like to be forwarded to the agent? "}


**With Fast Embeddings:**
* Check supported models [here](https://qdrant.github.io/fastembed/examples/Supported_Models/#supported-text-embedding-models)

In [63]:
start_time = time.time()
query_vector = list(model.embed(documents=f"{QUESTION}"))[0].tolist()
end_time=time.time()
latency = end_time-start_time
print(f"Latency embed query: {latency:.3f} seconds")

start_time = time.time()
top_k_vectors = qdrant_client.search(
    collection_name=collection_name,
    # query_filter=models.Filter(
    #     must=[
    #         models.FieldCondition(
    #             key="city",
    #             match=models.MatchValue(
    #                 value="London",
    #             ),
    #         )
    #     ]
    # ),
    query_vector=("qa_pairs", query_vector),
    limit=3,
)
end_time=time.time()
latency = end_time-start_time
print(f"Latency Search Index: {latency:.3f} seconds")

for vector in top_k_vectors:
    print(f"Score: {vector.score}\n\t{vector.payload}")


Latency embed query: 0.027 seconds
Latency Search Index: 0.226 seconds
Score: 0.8667942
	{'question': 'I have a question about an open repair request for one of our customers.', 'answer': 'We would like to advise you to use the request process and the systems provided by us (iSport). This will document your response to the rejected enquiry.'}
Score: 0.77080995
	{'question': 'I have a question about an outstanding repair enquiry under a warranty insurance policy for one of our customers.', 'answer': "I'm sorry, I can't help you with this question. Would you like to be forwarded to the agent? "}
Score: 0.73349786
	{'question': 'I have a question about the repair of a different manufacturer? ', 'answer': 'Please use the postmaster for this enquiry or send an email to technik-nkm@vwfs.com.'}


## Comparison with current approach

In [71]:
query_vector = openai_client.embeddings.create(input = f"{QUESTION}",model = EMBEDDING_MODEL).data[0].embedding
top_n = 3

Load embeddings

In [72]:
embeddings = []
for id, qa_pair in knowledge_base.iterrows():
    embeddings.append({
        "Question": qa_pair.Question,
        "Answer": qa_pair.Answer,
        "EmbeddedText": f"{qa_pair.Question} {qa_pair.Answer}",
        "Embeddings": openai_client.embeddings.create(input = f"{qa_pair.Question} {qa_pair.Answer}",model = EMBEDDING_MODEL).data[0].embedding
    })


Compute top_k

In [75]:
embeddings_values = [embedding["Embeddings"] for embedding in embeddings]

start_time=time.time()
scores = [np.dot(query_vector, emb) for emb in embeddings_values]
top_indices = np.argsort(scores)[-top_n:][::-1]
end_time=time.time()
latency = end_time-start_time
print(f"Latency Search Index: {latency:.3f} seconds")

top_k_vectors = [{"qa_pair": embeddings[i], "score": scores[i]} for i in top_indices]

for vector in top_k_vectors:
    print(f"Score: {vector['score']}\n\t{vector['qa_pair']}")


            

Latency Search Index: 0.003 seconds
Score: 0.8768958847398984
	{'Question': 'I have a question about an open repair request for one of our customers.', 'Answer': 'We would like to advise you to use the request process and the systems provided by us (iSport). This will document your response to the rejected enquiry.', 'EmbeddedText': 'I have a question about an open repair request for one of our customers. We would like to advise you to use the request process and the systems provided by us (iSport). This will document your response to the rejected enquiry.', 'Embeddings': [-0.01758730039000511, 0.021847577765583992, -0.00550285866484046, -0.007032189052551985, -0.009128190577030182, 0.02404599077999592, 0.0168499443680048, -0.010336634702980518, 0.007462313398718834, 0.005376552231609821, 0.013456742279231548, -0.004745020531117916, 0.02489258348941803, -0.017669228836894035, -0.013477223925292492, 0.009824582375586033, 0.028811492025852203, -0.01817445270717144, -0.009531005285680294,