In [None]:
! pip install -r requirements.txt

In [None]:
import os
import sys
import time
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from opentelemetry import trace
from azure.monitor.opentelemetry import configure_azure_monitor

# --- Telemetry Configuration ---
connection_string = os.environ["AZURE_CONNECTION_STRING"]

# Create the AIProjectClient using DefaultAzureCredential.
project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=connection_string,
)

# Get the Application Insights connection string from your project.
app_insights_conn_str = project_client.telemetry.get_connection_string()

# Configure Azure Monitor tracing.
configure_azure_monitor(connection_string=app_insights_conn_str)

# (Optional) Enable verbose logging of telemetry to stdout for local debugging.
project_client.telemetry.enable(destination=sys.stdout)

# Set up a tracer.
tracer = trace.get_tracer(__name__)

# --- Main Operations under a Tracing Span ---
with tracer.start_as_current_span("simple-agent-sample"):
    # Use environment variable directly for the search connection name.
    SEARCH_CONNECTION_NAME = os.environ["AI_SEARCH_CONNECTION_NAME"]
    connections = list(project_client.connections.list())
    conn_id = next(c.id for c in connections if c.name == SEARCH_CONNECTION_NAME)
    print("Using connection ID:", conn_id)

    # Create the Azure AI Search tool.
    SEARCH_INDEX_NAME = os.environ.get("AI_SEARCH_INDEX_NAME", "azure-search-docs")
    from azure.ai.projects.models import AzureAISearchTool
    search_tool = AzureAISearchTool(
        index_connection_id=conn_id,
        index_name=SEARCH_INDEX_NAME,
    )

    # Create an agent.
    agent = project_client.agents.create_agent(
        model="gpt-4o-mini",
        name="my-assistant",
        instructions="You are a helpful assistant.",
        tools=search_tool.definitions,
        tool_resources=search_tool.resources,
    )
    print("Created agent with ID:", agent.id)

    # Create a conversation thread.
    thread = project_client.agents.create_thread()
    print("Created thread with ID:", thread.id)

    # Send a user message.
    project_client.agents.create_message(
        thread_id=thread.id,
        role="user",
        content="From my index, what is binary quantization in Azure Search?",
    )
    print("User message created.")

    # Process the run synchronously (poll until completion).
    run = project_client.agents.create_and_process_run(
        thread_id=thread.id,
        assistant_id=agent.id,
    )
    while run.status in ["queued", "in_progress", "requires_action"]:
        time.sleep(1)
        run = project_client.agents.get_run(thread_id=thread.id, run_id=run.id)
        print("Current run status:", run.status)

    print("Run completed with status:", run.status)

    # Retrieve and print the assistant's reply.
    messages = project_client.agents.list_messages(thread_id=thread.id)
    assistant_replies = [m for m in messages.data if m.role == "assistant"]
    if assistant_replies:
        reply = assistant_replies[-1].content[0].text.value
        print("Assistant reply:", reply)
    else:
        print("No assistant reply received.")

print("Tracing complete. Check the 'Tracing' tab in your Azure AI Foundry project page.")


Using connection ID: /subscriptions/ee787b9b-a25f-4d20-86e9-45fcea5253dd/resourceGroups/fsunavala-sandbox/providers/Microsoft.MachineLearningServices/workspaces/fsunavala-aml-sandbox/connections/AzureAISearch
Created agent with ID: asst_C5ZWaDxrTx1Frb2Dpi5GHbAt
Created thread with ID: thread_06Eu6mTlO7ucAVQ5pMjlz6qq
User message created.
Run completed with status: RunStatus.COMPLETED
Assistant reply: Binary quantization in Azure Search is a method used for compressing embeddings, particularly when dealing with high-dimensional data (greater than 1024 dimensions). Binary quantization is effective when the embeddings are centered around zero, which is a common trait for popular embedding models like OpenAI's and Cohere's.

Key points about binary quantization in Azure Search include:

1. **Efficiency**: It significantly reduces the storage space required for vector storage and improves query response times.
2. **Combination with MRL**: When used alongside multilevel compression (MRL), bi