In [1]:
from conf.constants import QDRANT_KEY, QDRANT_URL
from openai import OpenAI
from qdrant_client import QdrantClient

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

# create an embedding using openai
def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   resp = OpenAI().embeddings.create(input = [text], model=model)
   return resp.data[0].embedding

# query the vector store
def query_qdrant(query, collection_name, top_k=5):
    
    embedded_query = get_embedding(text=query)

    qdrant_client = QdrantClient(
        QDRANT_URL,
        api_key=QDRANT_KEY,
    )
    
    query_results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=(embedded_query),
        limit=top_k,
    )
    
    return query_results


In [25]:
COLLECTION = "rhaetor.github.io_components"
QUERY = "JMS"
NUM_RESULTS = 5
query_results = query_qdrant(        
    query=QUERY, 
    collection_name=COLLECTION,
    top_k=NUM_RESULTS
    )

# list results oreder by score
df = pd.DataFrame(columns=['id', 'score', 'page_ref', 'entities', 'content'])

for i, article in enumerate(query_results):    
    data = {
        "id": article.id, 
        "score": round(article.score, 3),
        "page_ref": article.payload["metadata"]["page_number"],
        "entities": article.payload["metadata"]["entities"],
        "content": article.payload["page_content"]              
    }
    df_new_rows = pd.DataFrame(data, index=[i])
    df = pd.concat([df, df_new_rows])    

# General structure

In [26]:
display(df.iloc[:NUM_RESULTS])

Unnamed: 0,id,score,page_ref,entities,content
0,418e16a3-6cb6-472e-96bf-058c2dfc6a0a,0.86,_next_sjms-component.html.txt_10,0. Camel component name: SJMS\n1. Relevant tec...,"""For example, the following route shows how yo..."
1,5f0085d1-fb26-4de0-9f65-9d011f961727,0.855,_next_sjms-component.html.txt_0,0. Camel component name: Simple JMS\n1. Releva...,"""Article source: https://rhaetor.github.io/rh-..."
2,761548f4-5682-4f44-9c6e-265d39cdf7cc,0.854,_next_jms-component.html.txt_23,0. Camel component name: JMS\n1. Relevant tech...,"""Set to true, if you want to send message usin..."
3,4436ca1f-815f-4a39-9c6d-28ee4697b485,0.854,_next_sjms-component.html.txt_4,0. Camel component name: JMS\n1. Relevant tech...,"""Specifies whether to test the connection on s..."
4,f7f034db-e06a-46da-b680-0af935fe9ee4,0.854,_next_sjms2-component.html.txt_10,0. Camel component name: SJMS2\n1. Relevant te...,"""For example, the following route shows how yo..."


# Sizings

In [27]:
contents = df['content'].tolist()
for i,item in enumerate(contents):    
    print("Item", str(i),": ", len(item), "bytes \n")

Item 0 :  2400 bytes 

Item 1 :  2347 bytes 

Item 2 :  2477 bytes 

Item 3 :  2166 bytes 

Item 4 :  2408 bytes 



# Entities

In [28]:
entities = df['entities'].tolist()
for i, item in enumerate(entities):        
    print("Item", str(i),": \n", item, "\n\n")



Item 0 : 
 0. Camel component name: SJMS
1. Relevant technical concepts: JMS endpoints, dynamic destinations, transacted mode
3. Third-party services or tools: Not specified 


Item 1 : 
 0. Camel component name: Simple JMS
1. Relevant technical concepts: JMS, Spring JMS, Maven
3. Third-party services or tools: Spring Framework, JTA Transaction 


Item 2 : 
 0. Camel component name: JMS
1. Relevant technical concepts: JMSPriority, JMSDeliveryMode, JMSExpiration, request/reply over JMS, message priority
3. Third-party services or tools: Not specified 


Item 3 : 
 0. Camel component name: JMS
1. Relevant technical concepts: JMS Consumer, Asynchronous Processing, Client ID, Concurrent Consumers, Durable Subscription
3. Third-party services or tools: Apache ActiveMQ 


Item 4 : 
 0. Camel component name: SJMS2
1. Relevant technical concepts mentioned: JMS, JMS endpoint URL, CamelJmsDestinationName header, toD, transacted, JMS Session
3. Thirdparty services or tools mentioned: Not specifie