diff --git a/.openpublishing.redirection.ai.json b/.openpublishing.redirection.ai.json index 2cda8aab7cd31..b1215f1dbffff 100644 --- a/.openpublishing.redirection.ai.json +++ b/.openpublishing.redirection.ai.json @@ -2,15 +2,22 @@ "redirections": [ { "source_path_from_root": "/docs/ai/ai-extensions.md", - "redirect_url": "/dotnet/ai/microsoft-extensions-ai" + "redirect_url": "/dotnet/ai/microsoft-extensions-ai", + "redirect_document_id": true }, { "source_path_from_root": "/docs/ai/conceptual/agents.md", "redirect_url": "/dotnet/ai" }, + { + "source_path_from_root": "/docs/ai/conceptual/evaluation-libraries.md", + "redirect_url": "/dotnet/ai/evaluation/libraries", + "redirect_document_id": true + }, { "source_path_from_root": "/docs/ai/get-started/dotnet-ai-overview.md", - "redirect_url": "/dotnet/ai/overview" + "redirect_url": "/dotnet/ai/overview", + "redirect_document_id": true }, { "source_path_from_root": "/docs/ai/how-to/app-service-db-auth.md", @@ -24,6 +31,11 @@ "source_path_from_root": "/docs/ai/how-to/work-with-local-models.md", "redirect_url": "/dotnet/ai" }, + { + "source_path_from_root": "/docs/ai/quickstarts/evaluate-ai-response.md", + "redirect_url": "/dotnet/ai/evaluation/evaluate-ai-response", + "redirect_document_id": true + }, { "source_path_from_root": "/docs/ai/quickstarts/get-started-azure-openai.md", "redirect_url": "/dotnet/ai/quickstarts/build-chat-app" @@ -38,7 +50,8 @@ }, { "source_path_from_root": "/docs/ai/quickstarts/quickstart-assistants.md", - "redirect_url": "/dotnet/ai/quickstarts/create-assistant" + "redirect_url": "/dotnet/ai/quickstarts/create-assistant", + "redirect_document_id": true }, { "source_path_from_root": "/docs/ai/quickstarts/quickstart-azure-openai-tool.md", @@ -46,19 +59,32 @@ }, { "source_path_from_root": "/docs/ai/quickstarts/quickstart-local-ai.md", - "redirect_url": "/dotnet/ai/quickstarts/chat-local-model" + "redirect_url": "/dotnet/ai/quickstarts/chat-local-model", + "redirect_document_id": true }, { "source_path_from_root": "/docs/ai/quickstarts/quickstart-openai-generate-images.md", - "redirect_url": "/dotnet/ai/quickstarts/generate-images" + "redirect_url": "/dotnet/ai/quickstarts/generate-images", + "redirect_document_id": true }, { "source_path_from_root": "/docs/ai/quickstarts/quickstart-openai-summarize-text.md", - "redirect_url": "/dotnet/ai/quickstarts/prompt-model" + "redirect_url": "/dotnet/ai/quickstarts/prompt-model", + "redirect_document_id": true }, { "source_path_from_root": "/docs/ai/tutorials/llm-eval.md", - "redirect_url": "/dotnet/ai/quickstarts/evaluate-ai-response" + "redirect_url": "/dotnet/ai/evaluation/evaluate-ai-response" + }, + { + "source_path_from_root": "/docs/ai/tutorials/evaluate-safety.md", + "redirect_url": "/dotnet/ai/evaluation/evaluate-safety", + "redirect_document_id": true + }, + { + "source_path_from_root": "/docs/ai/tutorials/evaluate-with-reporting.md", + "redirect_url": "/dotnet/ai/evaluation/evaluate-with-reporting", + "redirect_document_id": true } ] } diff --git a/docs/ai/quickstarts/evaluate-ai-response.md b/docs/ai/evaluation/evaluate-ai-response.md similarity index 89% rename from docs/ai/quickstarts/evaluate-ai-response.md rename to docs/ai/evaluation/evaluate-ai-response.md index e81bbea6c0e7f..5814bd26bbc9d 100644 --- a/docs/ai/quickstarts/evaluate-ai-response.md +++ b/docs/ai/evaluation/evaluate-ai-response.md @@ -10,7 +10,7 @@ ms.topic: quickstart In this quickstart, you create an MSTest app to evaluate the quality of a chat response from an OpenAI model. The test app uses the [Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) libraries. > [!NOTE] -> This quickstart demonstrates the simplest usage of the evaluation API. Notably, it doesn't demonstrate use of the [response caching](../conceptual/evaluation-libraries.md#cached-responses) and [reporting](../conceptual/evaluation-libraries.md#reporting) functionality, which are important if you're authoring unit tests that run as part of an "offline" evaluation pipeline. The scenario shown in this quickstart is suitable in use cases such as "online" evaluation of AI responses within production code and logging scores to telemetry, where caching and reporting aren't relevant. For a tutorial that demonstrates the caching and reporting functionality, see [Tutorial: Evaluate a model's response with response caching and reporting](../tutorials/evaluate-with-reporting.md) +> This quickstart demonstrates the simplest usage of the evaluation API. Notably, it doesn't demonstrate use of the [response caching](libraries.md#cached-responses) and [reporting](libraries.md#reporting) functionality, which are important if you're authoring unit tests that run as part of an "offline" evaluation pipeline. The scenario shown in this quickstart is suitable in use cases such as "online" evaluation of AI responses within production code and logging scores to telemetry, where caching and reporting aren't relevant. For a tutorial that demonstrates the caching and reporting functionality, see [Tutorial: Evaluate a model's response with response caching and reporting](evaluate-with-reporting.md) ## Prerequisites @@ -103,4 +103,4 @@ If you no longer need them, delete the Azure OpenAI resource and GPT-4 model dep ## Next steps - Evaluate the responses from different OpenAI models. -- Add response caching and reporting to your evaluation code. For more information, see [Tutorial: Evaluate a model's response with response caching and reporting](../tutorials/evaluate-with-reporting.md). +- Add response caching and reporting to your evaluation code. For more information, see [Tutorial: Evaluate a model's response with response caching and reporting](evaluate-with-reporting.md). diff --git a/docs/ai/tutorials/evaluate-safety.md b/docs/ai/evaluation/evaluate-safety.md similarity index 92% rename from docs/ai/tutorials/evaluate-safety.md rename to docs/ai/evaluation/evaluate-safety.md index 6ae7fc152e7d6..7a0c686130802 100644 --- a/docs/ai/tutorials/evaluate-safety.md +++ b/docs/ai/evaluation/evaluate-safety.md @@ -110,7 +110,7 @@ Complete the following steps to create an MSTest project. > [!NOTE] > This code example passes the LLM as `originalChatClient` to . The reason to include the LLM chat client here is to enable getting a chat response from the LLM, and notably, to enable response caching for it. (If you don't want to cache the LLM's response, you can create a separate, local to fetch the response from the LLM.) Instead of passing a , if you already have a for an LLM from another reporting configuration, you can pass that instead, using the overload. > - > Similarly, if you configure both [LLM-based evaluators](../conceptual/evaluation-libraries.md#quality-evaluators) and [Azure AI Foundry Evaluation service–based evaluators](../conceptual/evaluation-libraries.md#safety-evaluators) in the reporting configuration, you also need to pass the LLM to . Then it returns a that can talk to both types of evaluators. + > Similarly, if you configure both [LLM-based evaluators](libraries.md#quality-evaluators) and [Azure AI Foundry Evaluation service–based evaluators](libraries.md#safety-evaluators) in the reporting configuration, you also need to pass the LLM to . Then it returns a that can talk to both types of evaluators. 1. Add a method to define the [chat options](xref:Microsoft.Extensions.AI.ChatOptions) and ask the model for a response to a given question. @@ -148,6 +148,6 @@ To generate a report to view the evaluation results, see [Generate a report](eva This tutorial covers the basics of evaluating content safety. As you create your test suite, consider the following next steps: -- Configure additional evaluators, such as the [quality evaluators](../conceptual/evaluation-libraries.md#quality-evaluators). For an example, see the AI samples repo [quality and safety evaluation example](https://github.com/dotnet/ai-samples/blob/main/src/microsoft-extensions-ai-evaluation/api/reporting/ReportingExamples.Example10_RunningQualityAndSafetyEvaluatorsTogether.cs). +- Configure additional evaluators, such as the [quality evaluators](libraries.md#quality-evaluators). For an example, see the AI samples repo [quality and safety evaluation example](https://github.com/dotnet/ai-samples/blob/main/src/microsoft-extensions-ai-evaluation/api/reporting/ReportingExamples.Example10_RunningQualityAndSafetyEvaluatorsTogether.cs). - Evaluate the content safety of generated images. For an example, see the AI samples repo [image response example](https://github.com/dotnet/ai-samples/blob/main/src/microsoft-extensions-ai-evaluation/api/reporting/ReportingExamples.Example09_RunningSafetyEvaluatorsAgainstResponsesWithImages.cs). - In real-world evaluations, you might not want to validate individual results, since the LLM responses and evaluation scores can vary over time as your product (and the models used) evolve. You might not want individual evaluation tests to fail and block builds in your CI/CD pipelines when this happens. Instead, in such cases, it might be better to rely on the generated report and track the overall trends for evaluation scores across different scenarios over time (and only fail individual builds in your CI/CD pipelines when there's a significant drop in evaluation scores across multiple different tests). diff --git a/docs/ai/tutorials/evaluate-with-reporting.md b/docs/ai/evaluation/evaluate-with-reporting.md similarity index 100% rename from docs/ai/tutorials/evaluate-with-reporting.md rename to docs/ai/evaluation/evaluate-with-reporting.md diff --git a/docs/ai/conceptual/evaluation-libraries.md b/docs/ai/evaluation/libraries.md similarity index 94% rename from docs/ai/conceptual/evaluation-libraries.md rename to docs/ai/evaluation/libraries.md index 377248b45fa97..9033644d6130a 100644 --- a/docs/ai/conceptual/evaluation-libraries.md +++ b/docs/ai/evaluation/libraries.md @@ -6,14 +6,14 @@ ms.date: 07/24/2025 --- # The Microsoft.Extensions.AI.Evaluation libraries -The Microsoft.Extensions.AI.Evaluation libraries simplify the process of evaluating the quality and accuracy of responses generated by AI models in .NET intelligent apps. Various metrics measure aspects like relevance, truthfulness, coherence, and completeness of the responses. Evaluations are crucial in testing, because they help ensure that the AI model performs as expected and provides reliable and accurate results. +The Microsoft.Extensions.AI.Evaluation libraries simplify the process of evaluating the quality and safety of responses generated by AI models in .NET intelligent apps. Various quality metrics measure aspects like relevance, truthfulness, coherence, and completeness of the responses. Safety metrics measure aspects like hate and unfairness, violence, and sexual content. Evaluations are crucial in testing, because they help ensure that the AI model performs as expected and provides reliable and accurate results. The evaluation libraries, which are built on top of the [Microsoft.Extensions.AI abstractions](../microsoft-extensions-ai.md), are composed of the following NuGet packages: - [📦 Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) – Defines the core abstractions and types for supporting evaluation. - [📦 Microsoft.Extensions.AI.Evaluation.NLP](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains [evaluators](#nlp-evaluators) that evaluate the similarity of an LLM's response text to one or more reference responses using natural language processing (NLP) metrics. These evaluators aren't LLM or AI-based; they use traditional NLP techniques such as text tokenization and n-gram analysis to evaluate text similarity. - [📦 Microsoft.Extensions.AI.Evaluation.Quality](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) – Contains [evaluators](#quality-evaluators) that assess the quality of LLM responses in an app according to metrics such as relevance and completeness. These evaluators use the LLM directly to perform evaluations. -- [📦 Microsoft.Extensions.AI.Evaluation.Safety](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) – Contains [evaluators](#safety-evaluators), such as the `ProtectedMaterialEvaluator` and `ContentHarmEvaluator`, that use the [Azure AI Foundry](/azure/ai-foundry/) Evaluation service to perform evaluations. +- [📦 Microsoft.Extensions.AI.Evaluation.Safety](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) – Contains [evaluators](#safety-evaluators), such as the `ProtectedMaterialEvaluator` and `ContentHarmEvaluator`, that use the [Azure AI Foundry](/azure/ai-foundry/) evaluation service to perform evaluations. - [📦 Microsoft.Extensions.AI.Evaluation.Reporting](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) – Contains support for caching LLM responses, storing the results of evaluations, and generating reports from that data. - [📦 Microsoft.Extensions.AI.Evaluation.Reporting.Azure](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the reporting library with an implementation for caching LLM responses and storing the evaluation results in an [Azure Storage](/azure/storage/common/storage-introduction) container. - [📦 Microsoft.Extensions.AI.Evaluation.Console](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) – A command-line tool for generating reports and managing evaluation data. @@ -60,7 +60,7 @@ NLP evaluators evaluate the quality of an LLM response by comparing it to a refe ### Safety evaluators -Safety evaluators check for presence of harmful, inappropriate, or unsafe content in a response. They rely on the Azure AI Foundry Evaluation service, which uses a model that's fine tuned to perform evaluations. +Safety evaluators check for presence of harmful, inappropriate, or unsafe content in a response. They rely on the Azure AI Foundry evaluation service, which uses a model that's fine tuned to perform evaluations. | Evaluator type | Metric | Description | |---------------------------------------------------------------------------|--------------------|-------------| @@ -86,7 +86,7 @@ The library contains support for storing evaluation results and generating repor :::image type="content" source="../media/ai-extensions/pipeline-report.jpg" lightbox="../media/ai-extensions/pipeline-report.jpg" alt-text="Screenshot of an AI evaluation report in an Azure DevOps pipeline."::: -The `dotnet aieval` tool, which ships as part of the `Microsoft.Extensions.AI.Evaluation.Console` package, includes functionality for generating reports and managing the stored evaluation data and cached responses. For more information, see [Generate a report](../tutorials/evaluate-with-reporting.md#generate-a-report). +The `dotnet aieval` tool, which ships as part of the `Microsoft.Extensions.AI.Evaluation.Console` package, includes functionality for generating reports and managing the stored evaluation data and cached responses. For more information, see [Generate a report](evaluate-with-reporting.md#generate-a-report). ## Configuration diff --git a/docs/ai/tutorials/media/evaluation-report.png b/docs/ai/evaluation/media/evaluation-report.png similarity index 100% rename from docs/ai/tutorials/media/evaluation-report.png rename to docs/ai/evaluation/media/evaluation-report.png diff --git a/docs/ai/evaluation/responsible-ai.md b/docs/ai/evaluation/responsible-ai.md new file mode 100644 index 0000000000000..d2279d30b61ea --- /dev/null +++ b/docs/ai/evaluation/responsible-ai.md @@ -0,0 +1,37 @@ +--- +title: Responsible AI with .NET +description: Learn what responsible AI is and how you can use .NET to evaluate the safety of your AI apps. +ms.date: 09/08/2025 +ai-usage: ai-assisted +--- + +# Responsible AI with .NET + +*Responsible AI* refers to the practice of designing, developing, and deploying artificial intelligence systems in a way that is ethical, transparent, and aligned with human values. It emphasizes fairness, accountability, privacy, and safety to ensure that AI technologies benefit individuals and society as a whole. As AI becomes increasingly integrated into applications and decision-making processes, prioritizing responsible AI is of utmost importance. + +Microsoft has identified [six principles](https://www.microsoft.com/ai/responsible-ai) for responsible AI: + +- Fairness +- Reliability and safety +- Privacy and security +- Inclusiveness +- Transparency +- Accountability + +If you're building an AI app with .NET, the [📦 Microsoft.Extensions.AI.Evaluation.Safety](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) package provides evaluators to help ensure that the responses your app generates, both text and image, meet the standards for responsible AI. The evaluators can also detect problematic content in user input. These safety evaluators use the [Azure AI Foundry evaluation service](/azure/ai-foundry/concepts/evaluation-evaluators/risk-safety-evaluators) to perform evaluations. They include metrics for hate and unfairness, groundedness, ungrounded inference of human attributes, and the presence of: + +- Protected material +- Self-harm content +- Sexual content +- Violent content +- Vulnerable code (text-based only) +- Indirect attacks (text-based only) + +For more information about the safety evaluators, see [Safety evaluators](libraries.md#safety-evaluators). To get started with the Microsoft.Extensions.AI.Evaluation.Safety evaluators, see [Tutorial: Evaluate response safety with caching and reporting](evaluate-safety.md). + +## See also + +- [Responsible AI at Microsoft](https://www.microsoft.com/ai/responsible-ai) +- [Training: Embrace responsible AI principles and practices](/training/modules/embrace-responsible-ai-principles-practices/) +- [Azure AI Foundry evaluation service](/azure/ai-foundry/concepts/evaluation-evaluators/risk-safety-evaluators) +- [Azure AI Content Safety](/azure/ai-services/content-safety/overview) diff --git a/docs/ai/quickstarts/snippets/evaluate-ai-responses/MyTests.cs b/docs/ai/evaluation/snippets/evaluate-ai-responses/MyTests.cs similarity index 100% rename from docs/ai/quickstarts/snippets/evaluate-ai-responses/MyTests.cs rename to docs/ai/evaluation/snippets/evaluate-ai-responses/MyTests.cs diff --git a/docs/ai/quickstarts/snippets/evaluate-ai-responses/TestAI.csproj b/docs/ai/evaluation/snippets/evaluate-ai-responses/TestAI.csproj similarity index 100% rename from docs/ai/quickstarts/snippets/evaluate-ai-responses/TestAI.csproj rename to docs/ai/evaluation/snippets/evaluate-ai-responses/TestAI.csproj diff --git a/docs/ai/tutorials/snippets/evaluate-safety/EvaluateResponseSafety.csproj b/docs/ai/evaluation/snippets/evaluate-safety/EvaluateResponseSafety.csproj similarity index 100% rename from docs/ai/tutorials/snippets/evaluate-safety/EvaluateResponseSafety.csproj rename to docs/ai/evaluation/snippets/evaluate-safety/EvaluateResponseSafety.csproj diff --git a/docs/ai/tutorials/snippets/evaluate-safety/MSTestSettings.cs b/docs/ai/evaluation/snippets/evaluate-safety/MSTestSettings.cs similarity index 100% rename from docs/ai/tutorials/snippets/evaluate-safety/MSTestSettings.cs rename to docs/ai/evaluation/snippets/evaluate-safety/MSTestSettings.cs diff --git a/docs/ai/tutorials/snippets/evaluate-safety/MyTests.cs b/docs/ai/evaluation/snippets/evaluate-safety/MyTests.cs similarity index 100% rename from docs/ai/tutorials/snippets/evaluate-safety/MyTests.cs rename to docs/ai/evaluation/snippets/evaluate-safety/MyTests.cs diff --git a/docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs b/docs/ai/evaluation/snippets/evaluate-with-reporting/MyTests.cs similarity index 100% rename from docs/ai/tutorials/snippets/evaluate-with-reporting/MyTests.cs rename to docs/ai/evaluation/snippets/evaluate-with-reporting/MyTests.cs diff --git a/docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj b/docs/ai/evaluation/snippets/evaluate-with-reporting/TestAIWithReporting.csproj similarity index 100% rename from docs/ai/tutorials/snippets/evaluate-with-reporting/TestAIWithReporting.csproj rename to docs/ai/evaluation/snippets/evaluate-with-reporting/TestAIWithReporting.csproj diff --git a/docs/ai/tutorials/snippets/evaluate-with-reporting/WordCountEvaluator.cs b/docs/ai/evaluation/snippets/evaluate-with-reporting/WordCountEvaluator.cs similarity index 100% rename from docs/ai/tutorials/snippets/evaluate-with-reporting/WordCountEvaluator.cs rename to docs/ai/evaluation/snippets/evaluate-with-reporting/WordCountEvaluator.cs diff --git a/docs/ai/toc.yml b/docs/ai/toc.yml index 6f87e87e4daf3..e867cf33701cb 100644 --- a/docs/ai/toc.yml +++ b/docs/ai/toc.yml @@ -83,16 +83,18 @@ items: href: /azure/ai-services/openai/how-to/risks-safety-monitor?toc=/dotnet/ai/toc.json&bc=/dotnet/ai/toc.json - name: Evaluation items: + - name: Responsible AI with .NET + href: evaluation/responsible-ai.md - name: The Microsoft.Extensions.AI.Evaluation libraries - href: conceptual/evaluation-libraries.md + href: evaluation/libraries.md - name: Tutorials items: - name: "Quickstart: Evaluate the quality of a response" - href: quickstarts/evaluate-ai-response.md + href: evaluation/evaluate-ai-response.md - name: "Evaluate response quality with caching and reporting" - href: tutorials/evaluate-with-reporting.md + href: evaluation/evaluate-with-reporting.md - name: "Evaluate response safety with caching and reporting" - href: tutorials/evaluate-safety.md + href: evaluation/evaluate-safety.md - name: Advanced items: - name: Sample interface implementations