In [15]:
from conf.constants import QDRANT_KEY, QDRANT_URL
from openai import OpenAI
from qdrant_client import QdrantClient

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

# create an embedding using openai
def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   resp = OpenAI().embeddings.create(input = [text], model=model)
   return resp.data[0].embedding

# query the vector store
def query_qdrant(query, collection_name, top_k=5):
    
    embedded_query = get_embedding(text=query)

    qdrant_client = QdrantClient(
        QDRANT_URL,
        api_key=QDRANT_KEY,
    )

    query_results = qdrant_client.search(
        collection_name=collection_name,
        query_vector=(embedded_query),
        limit=top_k,
    )
    
    return query_results


In [21]:
COLLECTION = "quarkus_getting_started"
QUERY = "deploy Camel Quarkus application to Openshift"
NUM_RESULTS = 5
query_results = query_qdrant(        
    query=QUERY, 
    collection_name=COLLECTION,
    top_k=NUM_RESULTS
    )

# list results oreder by score
df = pd.DataFrame(columns=['id', 'score', 'page_ref', 'entities', 'content'])

for i, article in enumerate(query_results):    
    data = {
        "id": article.id, 
        "score": round(article.score, 3),
        "page_ref": article.payload["metadata"]["page_number"],
        "entities": article.payload["metadata"]["entities"],
        "content": article.payload["page_content"]              
    }
    df_new_rows = pd.DataFrame(data, index=[i])
    df = pd.concat([df, df_new_rows])    

print("done")

done


# General structure

In [22]:
display(df.iloc[:NUM_RESULTS])

Unnamed: 0,id,score,page_ref,entities,content
0,5855fdf2-fd3d-4141-b3b4-1db8c598a208,0.854,0,0. Camel component name: Quarkus\n1. Relevant ...,"""Red Hat build of Apache Camel\n \n4.0\nGettin..."
1,1f8cdc54-a422-49bd-bab1-3010c20525f6,0.849,24,0. Camel component name: Not specified\n1. Rel...,"""CHAPTER 2. DEPLOYING QUARKUS APPLICATIONS\nYo..."
2,f0eab9fc-d602-42ed-9635-c835cf20e266,0.837,2,0. What's the Camel component name: Red Hat bu...,"""Red Hat build of Apache Camel\n \n4.0\n \nGet..."
3,fd70aeaf-eeb0-4289-8b84-93adec37ae0e,0.836,7,1. What's the Camel component name: Not specif...,"""PREFACE\nMAKING OPEN SOURCE MORE INCLUSIVE\nR..."
4,3edcbb48-2ee4-4685-9fc2-5a8b4156b452,0.835,23,0. Camel component name: Not specified\n1. Rel...,"""Camel Quarkus executes the production of bean..."


# Sizings

In [27]:
contents = df['content'].tolist()
for i,item in enumerate(contents):    
    print("Item", str(i),": ", len(item), "bytes \n")

Item 0 :  2400 bytes 

Item 1 :  2347 bytes 

Item 2 :  2477 bytes 

Item 3 :  2166 bytes 

Item 4 :  2408 bytes 



# Entities

In [18]:
entities = df['entities'].tolist()
for i, item in enumerate(entities):        
    print("Item", str(i),": \n", item, "\n\n")



Item 0 : 
 0. Camel component name: Quarkus
1. Relevant technical concepts: Not specified
3. Thirdparty services or tools: Red Hat 


Item 1 : 
 0. Camel component name: Not specified
1. Relevant technical concepts: OpenShift, Docker build strategy, S2I Binary, Source S2I, Quarkus applications, OpenShift build strategies, Quarkus native executables, deployment strategy
3. Thirdparty services or tools: Not specified 


Item 2 : 
 0. What's the Camel component name: Red Hat build of Apache Camel for Quarkus
1. What are relevant technical concepts mentioned: Quarkus
3. What thirdparty services or tools are mentioned: Not specified 


Item 3 : 
 1. What's the Camel component name: Not specified
2. What are relevant technical concepts mentioned: Quarkus
3. What thirdparty services or tools are mentioned: Red Hat 


Item 4 : 
 0. Camel component name: Not specified
1. Relevant technical concepts mentioned: Quarkus, Camel QuarkusTestSupport, JUnit Jupiter, adviceWith, @Produces, Configuring r