In [5]:
from pinecone.grpc import PineconeGRPC as Pinecone
from dotenv import load_dotenv
from os import getenv


load_dotenv()

True

In [6]:
pc = Pinecone(api_key=getenv("PINECONE_API_KEY"))

index_md = pc.Index(getenv("INDEX_NAME_MD"))
index_normal = pc.Index(getenv("INDEX_NAME_NORMAL"))

In [9]:
from openai import AzureOpenAI

embedding_deployment = "text-embedding-3-large"

client = AzureOpenAI(
    api_key=getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=getenv("AZURE_OPENAI_API_VERSION")
)


In [10]:
def convert_text_to_embedding(text: str) -> list:
    """
    Convert text to an embedding vector using Azure OpenAI.

    Args:
        text (str): Text to convert to an embedding.

    Returns:
        List[float]: Embedding vector for the query.
    """
    query_embedding = (
        client.embeddings.create(
            model=embedding_deployment,
            input=text,
            dimensions=3072,
        )
        .data[0]
        .embedding
    )
    return query_embedding

In [11]:
len(convert_text_to_embedding(text="Hi"))

3072

In [13]:
def get_response(query: str, use_md: bool = False):
    embeddings = convert_text_to_embedding(text=query)
    
    if use_md:
        index = index_md
    else:
        index = index_normal
    
    response = index.query(
                vector=embeddings,
                top_k=10,
                namespace="crawl-content",
                include_metadata=True,
                filter= {"priority": {"$eq": 2}},
                timeout=30,
            )
    return response

In [61]:
from IPython.display import HTML, Markdown

In [64]:
query = "Who is the CEO"

md_resp = get_response(query=query, use_md=True)
norm_resp = get_response(query=query, use_md=False)
md_results = results = [
            {"metadata": match.get("metadata", {}), "score": match.get("score")}
            for match in md_resp.matches
            if match.get("score")
        ]

norm_results = results = [
            {"metadata": match.get("metadata", {}), "score": match.get("score")}
            for match in norm_resp.matches
            if match.get("score")
        ]

# Create HTML string
html_content = "<div style='font-family: Arial, sans-serif;'>"
html_content += "<h3>Text and Scores</h3>"
html_content += "<ul style='list-style-type: none;'>"

for item in md_results:
    element = item["metadata"]
    score = item['score']
    # Color based on score
    color = 'green' if score >= 0.60 else 'orange' if score >= 0.5 else 'red'
    html_content += f"<li><b>{element['chunk_text']}</b>: <span style='color: {color};'>{score:.2f}</span></li>"
html_content += "</ul></div>"

display(Markdown(f"### Q: {query} (Markdown)"))
# Display the HTML
display(HTML(html_content))

# Create HTML string
html_content = "<div style='font-family: Arial, sans-serif;'>"
html_content += "<h3>Text and Scores</h3>"
html_content += "<ul style='list-style-type: none;'>"

for item in norm_results:
    element = item["metadata"]
    score = item['score']
    # Color based on score
    color = 'green' if score >= 0.60 else 'orange' if score >= 0.5 else 'red'
    html_content += f"<li><b>{element['chunk_text']}</b>: <span style='color: {color};'>{score:.2f}</span></li>"
html_content += "</ul></div>"

display(Markdown(f"### Q: {query} (Normal)"))
# Display the HTML
display(HTML(html_content))

### Q: Who is the CEO (Markdown)

### Q: Who is the CEO (Normal)

In [65]:
query = "Tell me about the database, is it HIPAA compliant"

md_resp = get_response(query=query, use_md=True)
norm_resp = get_response(query=query, use_md=False)
md_results = results = [
            {"metadata": match.get("metadata", {}), "score": match.get("score")}
            for match in md_resp.matches
            if match.get("score")
        ]

norm_results = results = [
            {"metadata": match.get("metadata", {}), "score": match.get("score")}
            for match in norm_resp.matches
            if match.get("score")
        ]

# Create HTML string
html_content = "<div style='font-family: Arial, sans-serif;'>"
html_content += "<h3>Text and Scores</h3>"
html_content += "<ul style='list-style-type: none;'>"

for item in md_results:
    element = item["metadata"]
    score = item['score']
    # Color based on score
    color = 'green' if score >= 0.60 else 'orange' if score >= 0.5 else 'red'
    html_content += f"<li><b>{element['chunk_text']}</b>: <span style='color: {color};'>{score:.2f}</span></li>"
html_content += "</ul></div>"

display(Markdown(f"### Q: {query} (Markdown)"))
# Display the HTML
display(HTML(html_content))

# Create HTML string
html_content = "<div style='font-family: Arial, sans-serif;'>"
html_content += "<h3>Text and Scores</h3>"
html_content += "<ul style='list-style-type: none;'>"

for item in norm_results:
    element = item["metadata"]
    score = item['score']
    # Color based on score
    color = 'green' if score >= 0.60 else 'orange' if score >= 0.5 else 'red'
    html_content += f"<li><b>{element['chunk_text']}</b>: <span style='color: {color};'>{score:.2f}</span></li>"
html_content += "</ul></div>"

display(Markdown(f"### Q: {query} (Normal)"))
# Display the HTML
display(HTML(html_content))

### Q: Tell me about the database, is it HIPAA compliant (Markdown)

### Q: Tell me about the database, is it HIPAA compliant (Normal)

In [67]:
query = "What is Pravin Wilfred known for"

md_resp = get_response(query=query, use_md=True)
norm_resp = get_response(query=query, use_md=False)
md_results = results = [
            {"metadata": match.get("metadata", {}), "score": match.get("score")}
            for match in md_resp.matches
            if match.get("score")
        ]

norm_results = results = [
            {"metadata": match.get("metadata", {}), "score": match.get("score")}
            for match in norm_resp.matches
            if match.get("score")
        ]

# Create HTML string
html_content = "<div style='font-family: Arial, sans-serif;'>"
html_content += "<h3>Text and Scores</h3>"
html_content += "<ul style='list-style-type: none;'>"

for item in md_results:
    element = item["metadata"]
    score = item['score']
    # Color based on score
    color = 'green' if score >= 0.60 else 'orange' if score >= 0.5 else 'red'
    html_content += f"<li><b>{element['chunk_text']}</b>: <span style='color: {color};'>{score:.2f}</span></li>"
html_content += "</ul></div>"

display(Markdown(f"### Q: {query} (Markdown)"))
# Display the HTML
display(HTML(html_content))

# Create HTML string
html_content = "<div style='font-family: Arial, sans-serif;'>"
html_content += "<h3>Text and Scores</h3>"
html_content += "<ul style='list-style-type: none;'>"

for item in norm_results:
    element = item["metadata"]
    score = item['score']
    # Color based on score
    color = 'green' if score >= 0.60 else 'orange' if score >= 0.5 else 'red'
    html_content += f"<li><b>{element['chunk_text']}</b>: <span style='color: {color};'>{score:.2f}</span></li>"
html_content += "</ul></div>"

display(Markdown(f"### Q: {query} (Normal)"))
# Display the HTML
display(HTML(html_content))

### Q: What is Pravin Wilfred known for (Markdown)

### Q: What is Pravin Wilfred known for (Normal)