# Azure AI Agents - File Search

<img src="https://learn.microsoft.com/en-us/azure/ai-services/agents/media/agent-service-the-glue.png" width=800>

> https://learn.microsoft.com/en-us/azure/ai-services/agents/

In [1]:
import json
import os
import sys
import time

#from azure.ai.projects import AIProjectClient
from datetime import datetime, timezone, timedelta
from azure.ai.agents import AgentsClient 
from azure.ai.agents.models import ListSortOrder
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv
from openai import AzureOpenAI

from azure.ai.agents.models import (
    FileSearchTool,
    FilePurpose,
    ListSortOrder, MessageAttachment
)


In [2]:
import azure.ai.agents.models as models
print(dir(models))

['AISearchIndexResource', 'Agent', 'AgentErrorDetail', 'AgentEventHandler', 'AgentRunStream', 'AgentStreamEvent', 'AgentThread', 'AgentThreadCreationOptions', 'AgentV1Error', 'AgentsNamedToolChoice', 'AgentsNamedToolChoiceType', 'AgentsResponseFormat', 'AgentsResponseFormatMode', 'AgentsToolChoiceOptionMode', 'AsyncAgentEventHandler', 'AsyncAgentRunStream', 'AsyncFunctionTool', 'AsyncToolSet', 'AzureAISearchQueryType', 'AzureAISearchTool', 'AzureAISearchToolDefinition', 'AzureAISearchToolResource', 'AzureFunctionBinding', 'AzureFunctionDefinition', 'AzureFunctionStorageQueue', 'AzureFunctionTool', 'AzureFunctionToolDefinition', 'BaseAgentEventHandler', 'BaseAgentEventHandlerT', 'BaseAsyncAgentEventHandler', 'BaseAsyncAgentEventHandlerT', 'BingCustomSearchConfiguration', 'BingCustomSearchTool', 'BingCustomSearchToolDefinition', 'BingCustomSearchToolParameters', 'BingGroundingSearchConfiguration', 'BingGroundingSearchToolParameters', 'BingGroundingTool', 'BingGroundingToolDefinition', 'C

In [3]:
load_dotenv()

True

In [4]:
sys.version

'3.13.4 (main, Jun  3 2025, 15:34:24) [Clang 17.0.0 (clang-1700.0.13.3)]'

## Project

In [5]:

endpoint = os.getenv("PROJECT_ENDPOINT")
print(f"Using endpoint: {endpoint}")
credential = DefaultAzureCredential()

project_client = AgentsClient(endpoint=endpoint, credential=credential)

Using endpoint: https://aq-ai-foundry-sweden-central.services.ai.azure.com/api/projects/firstProject


In [6]:
DATA_DIR = "data"

os.makedirs(DATA_DIR, exist_ok=True)

output_file = os.path.join(DATA_DIR, "document.pdf")

In [7]:
!wget https://arxiv.org/abs/2311.06242 -O $output_file

--2025-06-10 16:48:17--  https://arxiv.org/abs/2311.06242
Resolving arxiv.org (arxiv.org)... 151.101.3.42, 151.101.195.42, 151.101.67.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.3.42|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 48005 (47K) [text/html]
Saving to: ‘data/document.pdf’


2025-06-10 16:48:17 (2.87 MB/s) - ‘data/document.pdf’ saved [48005/48005]



In [8]:
model = "gpt-4o-mini"

In [9]:
file = project_client.files.upload_and_poll(file_path=output_file,
                                                  purpose=FilePurpose.AGENTS)

print(f"Uploaded file, file ID: {file.id}")

# create a vector store with the file you uploaded
vector_store = project_client.vector_stores.create_and_poll(
    file_ids=[file.id], name="document_vector_store")

print(f"Created vector store, vector store ID: {vector_store.id}")

Uploaded file, file ID: assistant-EEBaeeX8d114nFptA4DwMW
Created vector store, vector store ID: vs_t27TDF6BrVhxYNASVMSZNrPl


In [10]:
# create a file search tool
file_search_tool = FileSearchTool(vector_store_ids=[vector_store.id])

# notices that FileSearchTool as tool and tool_resources must be added or the agent will be unable to search the file
model = "gpt-4.1"
agent = project_client.create_agent(
    model=model,
    name="document_agent",
    instructions="You are an AI helpful agent to analyse document",
    tools=file_search_tool.definitions,
    tool_resources=file_search_tool.resources,
)

print(f"Created agent, agent ID: {agent.id}")

Created agent, agent ID: asst_MLR7N2n8xk9NsjOytKZAiabp


In [11]:
# Create a thread
thread = project_client.threads.create()
print(f"Created thread, thread ID: {thread.id}")

# Upload the user provided file as a messsage attachment
message_file = project_client.files.upload_and_poll(
    file_path=output_file, purpose=FilePurpose.AGENTS)

print(f"Uploaded file, file ID: {message_file.id}")

# Create a message with the file search attachment
# Notice that vector store is created temporarily when using attachments with a default expiration policy of seven days.
attachment = MessageAttachment(file_id=message_file.id,
                               tools=FileSearchTool().definitions)

prompt = "Summarize this document in three lines."

message = project_client.messages.create(thread_id=thread.id,
                                               role="user",
                                               content=prompt,
                                               attachments=[attachment])

print(f"Created message, message ID: {message.id}")

Created thread, thread ID: thread_jznGCYUQQvMaLxdYnlDw8Ql9
Uploaded file, file ID: assistant-GeUfrfKWu8js2TgA3VpUP7
Created message, message ID: msg_qnlNM1TIbuB1sbfv4CnmosRu


In [12]:
run = project_client.runs.create_and_process(thread_id=thread.id,
                                                   agent_id=agent.id)
print(f"Created run, run ID: {run.id}")

messages = project_client.messages.list(thread_id=thread.id)
print(f"Messages: {messages}")

Created run, run ID: run_HoAC4YsgMTCauqEWtsp4WoDF
Messages: <iterator object azure.core.paging.ItemPaged at 0x107b05d30>


In [13]:
    # Fetch and log all messages
    messages = project_client.messages.list(thread_id=thread.id, order=ListSortOrder.ASCENDING)

    # Print last messages from the thread
    for msg in messages:
        if msg.text_messages:
            last_text = msg.text_messages[-1]
            print(f"{msg.role}: {last_text.text.value}")

MessageRole.USER: Summarize this document in three lines.
MessageRole.AGENT: Certainly! Here is a concise three-line summary of the document:

1. The document provides an overview of key objectives, strategies, and current progress related to a specific project or initiative.
2. It highlights achievements, ongoing challenges, and areas requiring further attention or resources.
3. Recommendations for future actions and measurable outcomes are outlined to guide continued development and success.

If you need a more specific summary or details, please let me know the exact focus area.


## Another question

In [14]:
prompt = "Summarize this document in three lines."

message = project_client.messages.create(thread_id=thread.id,
                                               role="user",
                                               content=prompt,
                                               attachments=[attachment])

print(f"Created message, message ID: {message.id}")

Created message, message ID: msg_61R7LuUFaaCXQUySO8fc7nGb


In [15]:
run = project_client.runs.create_and_process(thread_id=thread.id,
                                                   agent_id=agent.id)
print(f"Created run, run ID: {run.id}")

messages = project_client.messages.list(thread_id=thread.id)
print(f"Messages: {messages}")

Created run, run ID: run_7VMBBfKRWyJeOKHVLUfMrFc1
Messages: <iterator object azure.core.paging.ItemPaged at 0x1078c2710>


In [16]:
messages = project_client.messages.list(thread_id=thread.id)
for message in messages:
    print(f"Message ID: {message.id}, Role: {message.role}, Content: {message.content}")

Message ID: msg_1F6b2k39mAntbdLGjyzje4Qu, Role: MessageRole.AGENT, Content: [{'type': 'text', 'text': {'value': 'The document outlines the key goals and recent progress of the project, emphasizing major milestones achieved and current challenges faced.  \nIt discusses resource allocation, critical issues impacting timelines, and strategies for risk mitigation.  \nRecommendations are provided for next steps to ensure continued successful project implementation.\n\nIf you need a more detailed or targeted summary, please specify the focus area or purpose.', 'annotations': []}}]
Message ID: msg_61R7LuUFaaCXQUySO8fc7nGb, Role: MessageRole.USER, Content: [{'type': 'text', 'text': {'value': 'Summarize this document in three lines.', 'annotations': []}}]
Message ID: msg_kXPm3Xx6v790N4n68AaSb3N4, Role: MessageRole.AGENT, Content: [{'type': 'text', 'text': {'value': 'Certainly! Here is a concise three-line summary of the document:\n\n1. The document provides an overview of key objectives, strate

In [17]:
messages = list(project_client.messages.list(thread_id=thread.id))
if messages:
    last_message = messages[0]
    print(f"Content: {last_message.content}")
else:
    print("No messages found.")

Content: [{'type': 'text', 'text': {'value': 'The document outlines the key goals and recent progress of the project, emphasizing major milestones achieved and current challenges faced.  \nIt discusses resource allocation, critical issues impacting timelines, and strategies for risk mitigation.  \nRecommendations are provided for next steps to ensure continued successful project implementation.\n\nIf you need a more detailed or targeted summary, please specify the focus area or purpose.', 'annotations': []}}]


## Post processing

In [18]:
# List all agents in the project
print("Listing all agents in the project:")
agents = project_client.list_agents()
for agent in agents:
    print(f"Agent ID: {agent.id}, Name: {agent.name}, Model: {agent.model}, Instructions: {agent.instructions}")

Listing all agents in the project:
Agent ID: asst_MLR7N2n8xk9NsjOytKZAiabp, Name: document_agent, Model: gpt-4.1, Instructions: You are an AI helpful agent to analyse document
Agent ID: asst_mVbBQDGl9mHKzVqdfCMGmTIb, Name: document_agent, Model: gpt-4.1, Instructions: You are an AI helpful agent to analyse document


In [20]:
# recurse through all agents and delete them adding a test to stop if the agent is not found
for agent in project_client.list_agents():
    try:
        print(f"Deleting agent ID: {agent.id}, Name: {agent.name}")
        project_client.delete_agent(agent.id)
    except Exception as e:
        print(f"Error deleting agent ID: {agent.id}, Name: {agent.name}, Error: {e}")
        break

In [21]:
# Delete the original file from the agent to free up space
print("Deleted file")
project_client.files.delete(file.id)
print("Done")

Deleted file
Done


## DO EVERYTHING in one run !!

## use an .md file as source and get insights from its content

In [22]:
# ------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# ------------------------------------

"""
DESCRIPTION:
    This sample demonstrates how to use agent operations with file searching from
    the Azure Agents service using a synchronous client.

USAGE:
    python sample_agents_file_search.py

    Before running the sample:

    pip install azure-ai-agents azure-identity

    Set these environment variables with your own values:
    1) PROJECT_ENDPOINT - The Azure AI Project endpoint, as found in the Overview
                          page of your Azure AI Foundry portal.
    2) MODEL_DEPLOYMENT_NAME - The deployment name of the AI model, as found under the "Name" column in
       the "Models + endpoints" tab in your Azure AI Foundry project.
"""

import os
from azure.ai.agents import AgentsClient
from azure.ai.agents.models import (
    FileSearchTool,
    FilePurpose,
    ListSortOrder,
)
from azure.identity import DefaultAzureCredential

notebook_dir = os.path.dirname(os.path.abspath(''))
asset_file_path = os.path.abspath(os.path.join(notebook_dir, "./assets/product_info_1.md"))

agents_client = AgentsClient(
    endpoint=os.environ["PROJECT_ENDPOINT"],
    credential=DefaultAzureCredential(),
)

with agents_client:

    # Upload file and create vector store
    # [START upload_file_create_vector_store_and_agent_with_file_search_tool]
    file = agents_client.files.upload_and_poll(file_path=asset_file_path, purpose=FilePurpose.AGENTS)
    print(f"Uploaded file, file ID: {file.id}")

    vector_store = agents_client.vector_stores.create_and_poll(file_ids=[file.id], name="my_vectorstore")
    print(f"Created vector store, vector store ID: {vector_store.id}")

    # Create file search tool with resources followed by creating agent
    file_search = FileSearchTool(vector_store_ids=[vector_store.id])

    agent = agents_client.create_agent(
        model=os.environ["MODEL_DEPLOYMENT_NAME"],
        name="my-agent",
        instructions="Hello, you are helpful agent and can search information from uploaded files",
        tools=file_search.definitions,
        tool_resources=file_search.resources,
    )
    # [END upload_file_create_vector_store_and_agent_with_file_search_tool]

    print(f"Created agent, ID: {agent.id}")

    # Create thread for communication
    thread = agents_client.threads.create()
    print(f"Created thread, ID: {thread.id}")

    # Create message to thread
    message = agents_client.messages.create(
        thread_id=thread.id, role="user", content="Hello, using the uplaoded file as source, what Contoso products do you know about?"
    )
    print(f"Created message, ID: {message.id}")

    # Create and process agent run in thread with tools
    run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
    print(f"Run finished with status: {run.status}")

    if run.status == "failed":
        # Check if you got "Rate limit is exceeded.", then you want to get more quota
        print(f"Run failed: {run.last_error}")

    # [START teardown]
    # Delete the file when done
    agents_client.vector_stores.delete(vector_store.id)
    print("Deleted vector store")

    agents_client.files.delete(file_id=file.id)
    print("Deleted file")

    # Delete the agent when done
    agents_client.delete_agent(agent.id)
    print("Deleted agent")
    # [END teardown]

    # Fetch and log all messages
    messages = agents_client.messages.list(thread_id=thread.id, order=ListSortOrder.ASCENDING)
    print()
    print()

    # Print last messages from the thread
    for msg in messages:
        if msg.text_messages:
            last_text = msg.text_messages[-1]
            print(f"{msg.role}: {last_text.text.value}")


Uploaded file, file ID: assistant-MVHDUTr5THVPGygG3UpUcT
Created vector store, vector store ID: vs_Tri9wx63sPtIkQe8jxvnLORZ
Created agent, ID: asst_kxsSgTNlavN5DSjCNN1vlQLy
Created thread, ID: thread_T9j0vGoVM5ewkzbPbgWAjPWN
Created message, ID: msg_xKUAFARSfXmX75ue3s8nCfTe
Run finished with status: RunStatus.COMPLETED
Deleted vector store
Deleted file
Deleted agent


MessageRole.USER: Hello, using the uplaoded file as source, what Contoso products do you know about?
MessageRole.AGENT: Based on the uploaded file, here are some Contoso products mentioned:

- Contoso Coffee
- Contoso Tea
- Organic Coffee
- Espresso
- Herbal Tea

If you need a detailed list or more information about each product, let me know!
