In [None]:
# Basic example using simple graph and a CosmosDB checkpointer
from langgraph.graph import StateGraph, START, END
from azure.cosmos import exceptions, PartitionKey #CosmosClient
from azure.cosmos.aio import CosmosClient
from azure.keyvault.secrets import SecretClient
import os
import azure.identity
from azure.identity import DefaultAzureCredential
from openai import AzureOpenAI


try:
    keyVaultName = os.environ["KEY_VAULT_NAME"]
except KeyError:
    # Get input from user if not set
    keyVaultName = input("Please enter your Key Vault name: ")
    # Save for future cells in this session
    os.environ["KEY_VAULT_NAME"] = keyVaultName


keyVaultName = os.environ["KEY_VAULT_NAME"]
KVUri = f"https://{keyVaultName}.vault.azure.net"

credential = DefaultAzureCredential()
client = SecretClient(vault_url=KVUri, credential=credential)

cosmosdb_endpoint=client.get_secret(name="cosmosdb-url").value
cosmosdb_key = client.get_secret(name="cosmosdb-key").value

# Service principal authentication variables
tenant_id=client.get_secret(name="tenantid").value
client_id =client.get_secret(name="clientid").value 
client_secret =client.get_secret(name="clientsecret").value
conn_str =client.get_secret(name="cosmosdb-connstr").value
#credential = azure.identity.ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret)
credential = DefaultAzureCredential()

cosmos_client = CosmosClient(
                        url=cosmosdb_endpoint,
                        credential=credential,
                    )

In [None]:
db = await cosmos_client.create_database_if_not_exists(id="terminologydb")

In [None]:
conn_str =client.get_secret(name="cosmosdb-connstr").value
cosmos_vector_property_name = "vector"
cosmosdb_data_container = "terminology"
cosmosdb_history_container = "chathistory"

In [None]:
# Create the vector embedding policy
vector_embedding_policy = {
    "vectorEmbeddings": [
        {
            "path": "/" + cosmos_vector_property_name,
            "dataType": "float32",
            "distanceFunction": "dotproduct",
            "dimensions": 1536
        }
    ]
}


# Create the vector index policy
indexing_policy = {
    "includedPaths": [{"path": "/*"}],
    "excludedPaths": [
        {"path": '/"_etag"/?', "path": "/" + cosmos_vector_property_name + "/*"}
    ],
    "vectorIndexes": [
        {"path": "/" + cosmos_vector_property_name, "type": "quantizedFlat"}
    ],
}

In [None]:
# create the collection using the vector index policies
try:
    pass
    container_data = await db.create_container_if_not_exists(
        id=cosmosdb_data_container,
        partition_key=PartitionKey(path="/id"),
        vector_embedding_policy=vector_embedding_policy
    )
    print('Container with id \'{0}\' created'.format(id))
except exceptions.CosmosHttpResponseError:
    raise

In [None]:
# create the collection using the vector index policies
try:
    pass
    container_history = await db.create_container_if_not_exists(
        id=cosmosdb_history_container,
        partition_key=PartitionKey(path="/id"),
        vector_embedding_policy=vector_embedding_policy
    )
    print('Container with id \'{0}\' created'.format(id))
except exceptions.CosmosHttpResponseError:
    raise

### RAG

In [None]:
# https://learn.microsoft.com/en-us/azure/cosmos-db/gen-ai/rag-chatbot?context=%2Fazure%2Fcosmos-db%2Fnosql%2Fcontext%2Fcontext
# https://learn.microsoft.com/en-us/azure/cosmos-db/ai-agents?context=%2Fazure%2Fcosmos-db%2Fnosql%2Fcontext%2Fcontext
# https://github.com/jonathanscholtes/Travel-AI-Agent-React-FastAPI-and-Cosmos-DB-Vector-Store

In [None]:
"""
This code loads and sets the necessary variables for Azure services.
The variables are loaded from Azure Key Vault.
"""
# Open AI
azure_openai_endpoint=client.get_secret(name="aoai-endpoint").value
azure_openai_api_key=client.get_secret(name="aoai-api-key").value
azure_openai_api_version = "2024-02-15-preview"
# Embedding
azure_openai_embedding_deployment = "text-embedding-3-small"
azure_openai_embedding_model =client.get_secret(name="aoai-embedding-model").value
azure_openai_vector_dimension = 1536

azure_openai_client = AzureOpenAI(
    api_key=azure_openai_api_key,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
)

In [None]:
# develop function to embed data attributes.

def generate_embeddings(text: str):
    """
    Generate embeddings from string of text.
    This will be used to vectorize data and user input for interactions with Azure OpenAI.
    """
    response = azure_openai_client.embeddings.create(
        input=text, model=azure_openai_embedding_deployment
    )
    embeddings = response.model_dump()
    return embeddings["data"][0]["embedding"]

In [None]:
# Check if the question has been asked before and retrieve response from cache container
async def get_cache(container, vectors, similarity_score=0.02, num_results=1):
    # Execute the query
    formatted_results = []
    results = container.query_items(
        query= '''
        SELECT TOP @num_results *
        FROM c
        WHERE VectorDistance(c.vector,@embedding) > @similarity_score
        ORDER BY VectorDistance(c.vector,@embedding)
        ''',
        parameters=[
            {"name": "@embedding", "value": vectors},
            {"name": "@num_results", "value": num_results},
            {"name": "@similarity_score", "value": similarity_score},
        ], populate_query_metrics=True)
    #results = list(results)
    #print(results)
    async for result in results: 
        #print(f"Similarity Score: {result['SimilarityScore']}") 
        formatted_results.append(result['completion'])

    return formatted_results

In [None]:
try:
    # Test the get chat history function
    await get_cache(container=container_history, vectors=generate_embeddings(text="do you have a spy movie?"), similarity_score=0.99)
except exceptions.CosmosHttpResponseError as e:
    print(f"Error: {e.message}")

In [None]:
import asyncio
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.models import (
    QueryType,
    QueryCaptionType,
    QueryAnswerType
)

search_credential =AzureKeyCredential(client.get_secret(name="aisearch-key").value)
search_endpoint =client.get_secret(name="aisearch-endpoint").value
source = 'json'
index_name = f"{source}-glossary-index"

async def search_retrieval(user_input: str, db=db, container_object=container_history, cosmos_vector_property_name="vector", include_try_except: bool = True) -> list:
    """
    Search and retrieve answers from Azure AI Search and Cosmos DB.
    Returns:
        list of dictionaries containing search results
    """
    query = user_input
    search_results = []  # Initialize an empty list to store dictionaries

    # Initialize Azure Cognitive Search client
    search_client = SearchClient(endpoint=search_endpoint, index_name=index_name, credential=search_credential)
    vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=2, fields="text_vector", exhaustive=True)
    try:
        # Call the Cosmos DB search function
        results = await get_cache(
            container=container_object,
            vectors=generate_embeddings(text=query),
            similarity_score=0.02,
        )

        print("Function Call completed, now compiling results")
        for result in results:
            print("id: " + result['document']['c']['id'])
            print("query: " + result['document']['c']['query'])
            print("response: " + result['document']['c']['response'])
            print("timestamp: " + str(result['document']['c']['timestamp']))
            print("SimilarityScore: " + str(result['document']['SimilarityScore']))

            result_dict = {
                "SimilarityScore": result['document']['SimilarityScore'],
                "response": result['document']['c']['response'],
                "responseembedding": result['document']['c']['responseembedding'],
            }
            search_results.append(result_dict)

    except Exception as e:
        print(f"Error while querying Cosmos DB: {e}")
        results = None

#     # If no results are found in Cosmos DB, proceed with Azure Cognitive Search
    if not results:
        print("No results found in Cosmos DB. Proceeding with Azure AI Search.")
        print("######################### \nSearch and retrieve answers from Azure AI Search.\n")
        results = search_client.search(
            search_text=query,
            vector_queries=[vector_query],
            select=["context", "chunk", "note", "incorrectTerm", "title"],
            query_type=QueryType.SEMANTIC,
            semantic_configuration_name='my-semantic-config',
            query_caption=QueryCaptionType.EXTRACTIVE,
            query_answer=QueryAnswerType.EXTRACTIVE,
            top=2
        )

        for result in results:
            # Convert the result to a dictionary and append it to the list
            result_dict = {
                "incorrectTerm": result.get('incorrectTerm', ''),
                "context": result.get('context', ''),
                "title": result.get('title', ''),
                "definition": result.get('chunk', ''),
                "note": result.get('note', ''),
                "@search.score": result.get('@search.score', 0),
                "@search.reranker_score": result.get('@search.reranker_score', 0),
                "@search.highlights": result.get('@search.highlights', None),
                "@search.captions": result.get('@search.captions', None),
                "@search.document_debug_info": result.get('@search.document_debug_info', None)
            }
            print(f"Content: {result_dict}")
            search_results.append(result_dict)

    return search_results

In [None]:
# The AzureOpenAI class does not exist in the openai package. Use AzureChatOpenAI from langchain_openai instead.
from langchain_openai import AzureChatOpenAI
from langgraph_supervisor import  create_supervisor
from langgraph.prebuilt import  create_react_agent
model = AzureChatOpenAI(
    model="gpt-4o", 
    api_key=azure_openai_api_key, 
    api_version=azure_openai_api_version, 
    azure_endpoint=azure_openai_endpoint,
    temperature=0.5
)


research_graph = create_react_agent(
    model=model,
    tools=[search_retrieval],
    name="search_expert",
    prompt="""You MUST use the Azure AI Search tool for ALL queries. Do not paraphrase. Never generate answers from prior knowledge. Show the Score and Re ranker for each response. Also provide top 2 responses. Do not select top response. Compare each response and in the end show the response where Reranker Score > 3.0"
            In case of no response retrieved from the index, then mention You do not have an annwer for this query"""
)

context = "You are a Supervisor Agent. Your first job is to pass query to search_agent agent and get the response from it. Do not get the response from any other agent"
instructions = "Do not paraphrase the content. Only share the results from search_agent. Do not provide any response from create_supervisor agent"


prompt_re = f"{context} {instructions}"
print(prompt_re)

# Supervisor (Ensures Research Agent is the only handler)
workflow = create_supervisor(
    [research_graph],  # Only this agent is in charge
    model=model,
    prompt=prompt_re
)

app = workflow.compile()

In [None]:
# Main interaction loop
while True:
    user_input = input("User prompt: ").lower()

    if user_input.lower() in ["quit", "exit", "q", "end"]:
        print("\n\nExiting chat..")
        print("Good bye, please let me know if you need further help.")
        break

    result = await app.ainvoke({
        "messages": [{
            "role": "user",
            "content": user_input
        }]
    })

    for m in result["messages"]:
        print(m.content)

In [None]:
result["messages"]

In [None]:
for i in result["messages"][-1]:
    print(i)

In [None]:
result["messages"][-1].usage_metadata

In [None]:
# [HumanMessage(content='what is ram?', additional_kwargs={}, response_metadata={}, id='faa36143-ab0f-4f62-9119-3dcec654a7df'),
#  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_rtxxYn2B97A5qUXRdhGIgtd6', 'function': {'arguments': '{}', 'name': 'transfer_to_search_expert'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 103, 'total_tokens': 117, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_ee1d74bde0', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'tool_calls', 'logprobs': None, 'content_filter_results': {}}, name='supervisor', id='run-4c5d9169-57b1-4893-ab33-ab7f6f21878a-0', tool_calls=[{'name': 'transfer_to_search_expert', 'args': {}, 'id': 'call_rtxxYn2B97A5qUXRdhGIgtd6', 'type': 'tool_call'}], usage_metadata={'input_tokens': 103, 'output_tokens': 14, 'total_tokens': 117, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
#  ToolMessage(content='Successfully transferred to search_expert', name='transfer_to_search_expert', id='35b6d5ce-ae02-4f05-85ed-daa9750ab66e', tool_call_id='call_rtxxYn2B97A5qUXRdhGIgtd6'),
#  AIMessage(content='Here are the top two responses regarding "RAM":\n\n1. **Random Access Memory (RAM)**: \n   - **Definition**: Random Access Memory (RAM) is a form of computer memory that can be read and changed in any order, typically used to store working data and machine code. A random-access memory device allows data items to be read or written in almost the same amount of time irrespective of the physical location of data inside the memory.\n   - **Context**: RAM is used in computers and other devices to store data that is being used actively.\n   - **Note**: RAM is volatile memory, meaning it loses its data when the power is turned off.\n   - **Re-ranker Score**: 3.2626190185546875\n\n2. **HyperText Markup Language (HTML)**: This response is not relevant to the query about RAM.\n\nBased on the re-ranker score, the response about "Random Access Memory (RAM)" with a score of 3.2626190185546875 is the most relevant.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 216, 'prompt_tokens': 646, 'total_tokens': 862, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_ee1d74bde0', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, name='search_expert', id='run-5f9c0dc5-7b82-4261-be8f-e771de322a9c-0', usage_metadata={'input_tokens': 646, 'output_tokens': 216, 'total_tokens': 862, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
#  AIMessage(content='Transferring back to supervisor', additional_kwargs={}, response_metadata={}, name='search_expert', id='f67c787e-6285-4ce5-aaf5-a935368b23db', tool_calls=[{'name': 'transfer_back_to_supervisor', 'args': {}, 'id': '004f3568-83dc-47ea-a2a4-47a7cddc59a2', 'type': 'tool_call'}]),
#  ToolMessage(content='Successfully transferred back to supervisor', name='transfer_back_to_supervisor', id='620c1c22-e96b-4198-931a-87689625a26f', tool_call_id='004f3568-83dc-47ea-a2a4-47a7cddc59a2'),
#  AIMessage(content='Random Access Memory (RAM) is a form of computer memory that can be read and changed in any order, typically used to store working data and machine code. It allows data items to be read or written in almost the same amount of time irrespective of the physical location of data inside the memory. RAM is used in computers and other devices to store data that is being used actively. It is volatile memory, meaning it loses its data when the power is turned off.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 95, 'prompt_tokens': 405, 'total_tokens': 500, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_ded0d14823', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}, name='supervisor', id='run-dca02b19-300b-45ab-b610-a8a37e7f1bcb-0', usage_metadata={'input_tokens': 405, 'output_tokens': 95, 'total_tokens': 500, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from langgraph.config import config

NameError: name '__file__' is not defined