In [68]:
from conf.constants import QDRANT_KEY, QDRANT_URL
from openai import OpenAI
from qdrant_client import QdrantClient

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

# create an embedding using openai
def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   resp = OpenAI().embeddings.create(input = [text], model=model)
   return resp.data[0].embedding

# query the vector store
def query_qdrant(query, collection_name, top_k=5):
    
    embedded_query = get_embedding(text=query)

    qdrant_client = QdrantClient(
        QDRANT_URL,
        api_key=QDRANT_KEY,
    )
    
    query_results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=(embedded_query),
        limit=top_k,
    )
    
    return query_results


In [75]:
COLLECTION = "rhaetor.github.io_components"
QUERY = "enterprise integration pattern, idempotent consumer"

query_results = query_qdrant(        
    query=QUERY, 
    collection_name=COLLECTION,
    top_k=10
    )

# list results oreder by score
df = pd.DataFrame(columns=['id', 'score', 'page_ref', 'entities', 'content'])

for i, article in enumerate(query_results):    
    data = {
        "id": article.id, 
        "score": round(article.score, 3),
        "page_ref": article.payload["metadata"]["page_number"],
        "entities": article.payload["metadata"]["entities"],
        "content": article.payload["page_content"]              
    }
    df_new_rows = pd.DataFrame(data, index=[i])
    df = pd.concat([df, df_new_rows])    

# General structure

In [76]:
display(df.iloc[:5])

Unnamed: 0,id,score,page_ref,entities,content
0,26553dab-aaed-48a7-b62b-24cbcbebadc2,0.89,_eips_idempotentConsumer-eip.html.txt_0,0. Camel component name: Idempotent Consumer\n...,"""Article source: https://rhaetor.github.io/rh-..."
1,c4cabb1e-ecb4-4c8d-8175-86a94dd7a9f6,0.855,_eips_eventDrivenConsumer-eip.html.txt,0. Camel component name: Event Driven Consumer...,"""Article source: https://rhaetor.github.io/rh-..."
2,a51abebc-7765-41e1-8b1b-f16317b31c92,0.846,_next_sql-component.html.txt_10,0. Camel component name: SQL\n1. Relevant tech...,"""When using the SQL component as consumer, you..."
3,13379a08-c043-4df9-ba30-a50b841fe6cb,0.835,_next_jpa-component.html.txt_10,0. What's the Camel component name: JPA Compon...,"""<camelContext xmlns=""http://camel.apache.org/..."
4,1a446cf5-75f4-4966-adc7-d0306bfe0c80,0.833,_next_file-component.html.txt_25,0. Camel component name: Not specified\n1. Rel...,"""<!-- define our store as a plain spring bean ..."


# Sizings

In [77]:
contents = df['content'].tolist()
for i,item in enumerate(contents):    
    print("Item", str(i),": ", len(item), "bytes \n")

Item 0 :  2415 bytes 

Item 1 :  1000 bytes 

Item 2 :  2462 bytes 

Item 3 :  2287 bytes 

Item 4 :  2485 bytes 

Item 5 :  2166 bytes 

Item 6 :  2462 bytes 

Item 7 :  547 bytes 

Item 8 :  2460 bytes 

Item 9 :  2499 bytes 



# Entities

In [78]:
entities = df['entities'].tolist()
for i, item in enumerate(entities):        
    print("Item", str(i),": \n", item, "\n\n")



Item 0 : 
 0. Camel component name: Idempotent Consumer
1. Relevant technical concepts: EIP patterns, Message Filter, correlation key, idempotent repository, completion behavior
3. Thirdparty services or tools: Not specified 


Item 1 : 
 0. Camel component name: Event Driven Consumer
1. Relevant technical concepts mentioned: EIP patterns, default consumer model, event based, alternative consumer mode, Processor interface, Message Endpoint
3. Thirdparty services or tools mentioned: JMS queue 


Item 2 : 
 0. Camel component name: SQL
1. Relevant technical concepts mentioned: Consumer, Producer, SQL queries, Dynamic query parameters, IN queries, Simple expression, Message body, Headers, JDBC based idempotent repository
3. Third-party services or tools mentioned: None specified 


Item 3 : 
 0. What's the Camel component name: JPA Component
1. What are relevant technical concepts mentioned: Idempotent consumer, JPA EntityManager, Spring Transaction Management
3. What thirdparty services 