diff --git a/Cargo.lock b/Cargo.lock index 9ec45cd..1587d93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,7 @@ version = 4 name = "ado-aw" version = "0.11.0" dependencies = [ + "ado-aw-derive", "anyhow", "async-trait", "axum", @@ -32,6 +33,15 @@ dependencies = [ "url", ] +[[package]] +name = "ado-aw-derive" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "aho-corasick" version = "1.1.4" diff --git a/Cargo.toml b/Cargo.toml index 8b68991..1b26f43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ version = "0.11.0" edition = "2024" [dependencies] +ado-aw-derive = { path = "ado-aw-derive" } clap = { features = ["derive", "env"], version = "4.5.40" } anyhow = "1.0.100" async-trait = "0.1" diff --git a/ado-aw-derive/Cargo.toml b/ado-aw-derive/Cargo.toml new file mode 100644 index 0000000..865d022 --- /dev/null +++ b/ado-aw-derive/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "ado-aw-derive" +version = "0.1.0" +edition = "2024" + +[lib] +proc-macro = true + +[dependencies] +syn = { version = "2", features = ["full", "extra-traits"] } +quote = "1" +proc-macro2 = "1" diff --git a/ado-aw-derive/src/lib.rs b/ado-aw-derive/src/lib.rs new file mode 100644 index 0000000..d456cca --- /dev/null +++ b/ado-aw-derive/src/lib.rs @@ -0,0 +1,338 @@ +//! Derive macros for sanitization traits. +//! +//! Provides `#[derive(SanitizeConfig)]` and `#[derive(SanitizeContent)]` for automatic +//! implementation of field-level sanitization on structs. +//! +//! # Internal crate coupling +//! +//! The generated code references symbols via `crate::sanitize::` paths (e.g., +//! `crate::sanitize::sanitize_config`, `crate::sanitize::SanitizeConfig`). This +//! hard-codes the consumer crate's module layout. The coupling is intentional — +//! this proc macro is designed exclusively for the `ado-aw` crate and is not +//! intended for external use. If the `sanitize` module is ever renamed or +//! restructured, the generated paths in this crate must be updated to match. + +use proc_macro::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Fields, GenericArgument, PathArguments, Type, parse_macro_input}; + +// ── Shared helpers ───────────────────────────────────────────────────────── + +/// Parsed field-level attributes for a sanitization derive. +struct FieldAttrs { + skip: bool, + nested: bool, + light: bool, + sanitize_keys: bool, +} + +/// Parse `#[sanitize_config(...)]` or `#[sanitize_content(...)]` attributes on a field. +fn parse_field_attrs(field: &syn::Field, attr_name: &str) -> FieldAttrs { + let mut attrs = FieldAttrs { + skip: false, + nested: false, + light: false, + sanitize_keys: false, + }; + + for attr in &field.attrs { + if attr.path().is_ident(attr_name) { + let _ = attr.parse_nested_meta(|meta| { + if meta.path.is_ident("skip") { + attrs.skip = true; + } else if meta.path.is_ident("nested") { + attrs.nested = true; + } else if meta.path.is_ident("light") { + attrs.light = true; + } else if meta.path.is_ident("sanitize_keys") { + attrs.sanitize_keys = true; + } + Ok(()) + }); + } + } + + attrs +} + +/// Check whether a `Type` is exactly `String`. +fn is_string(ty: &Type) -> bool { + matches_path_ident(ty, "String") +} + +/// Check whether a `Type` is `Option`. +fn is_option_string(ty: &Type) -> bool { + is_generic_of(ty, "Option", is_string) +} + +/// Check whether a `Type` is `Option>`. +fn is_option_vec_string(ty: &Type) -> bool { + is_generic_of(ty, "Option", is_vec_string) +} + +/// Check whether a `Type` is `Vec`. +fn is_vec_string(ty: &Type) -> bool { + is_generic_of(ty, "Vec", is_string) +} + +/// Check whether a `Type` is `HashMap` (std or std::collections). +fn is_hashmap_string_string(ty: &Type) -> bool { + if let Type::Path(type_path) = ty { + let seg = type_path.path.segments.last(); + if let Some(seg) = seg { + if seg.ident == "HashMap" { + if let PathArguments::AngleBracketed(args) = &seg.arguments { + let type_args: Vec<_> = args + .args + .iter() + .filter_map(|a| { + if let GenericArgument::Type(t) = a { + Some(t) + } else { + None + } + }) + .collect(); + return type_args.len() == 2 + && is_string(type_args[0]) + && is_string(type_args[1]); + } + } + } + } + false +} + +/// Check whether a `Type` is a simple path matching `ident` (e.g., `String`). +fn matches_path_ident(ty: &Type, ident: &str) -> bool { + if let Type::Path(type_path) = ty { + type_path.path.is_ident(ident) + } else { + false + } +} + +/// Check whether a `Type` is `Wrapper` where `Inner` satisfies `pred`. +fn is_generic_of(ty: &Type, wrapper: &str, pred: fn(&Type) -> bool) -> bool { + if let Type::Path(type_path) = ty { + if let Some(seg) = type_path.path.segments.last() { + if seg.ident == wrapper { + if let PathArguments::AngleBracketed(args) = &seg.arguments { + if let Some(GenericArgument::Type(inner)) = args.args.first() { + return pred(inner); + } + } + } + } + } + false +} + +// ── #[derive(SanitizeConfig)] ────────────────────────────────────────────── + +/// Derive macro for `SanitizeConfig`. +/// +/// Automatically implements `sanitize_config_fields(&mut self)` by calling +/// `crate::sanitize::sanitize_config()` on all recognised string-typed fields. +/// +/// # Field attributes +/// +/// - `#[sanitize_config(skip)]` — do not sanitize this field. +/// - `#[sanitize_config(nested)]` — call `sanitize_config_fields()` on this field +/// (it must implement `SanitizeConfig`). +/// - `#[sanitize_config(sanitize_keys)]` — for `HashMap`, also +/// sanitize the keys (default: values only). +#[proc_macro_derive(SanitizeConfig, attributes(sanitize_config))] +pub fn derive_sanitize_config(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let name = &input.ident; + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + + let fields = match &input.data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => &fields.named, + _ => { + return syn::Error::new_spanned( + &input.ident, + "SanitizeConfig can only be derived for structs with named fields", + ) + .to_compile_error() + .into(); + } + }, + _ => { + return syn::Error::new_spanned( + &input.ident, + "SanitizeConfig can only be derived for structs", + ) + .to_compile_error() + .into(); + } + }; + + let mut stmts = Vec::new(); + + for field in fields { + let field_name = field.ident.as_ref().unwrap(); + let attrs = parse_field_attrs(field, "sanitize_config"); + + if attrs.skip { + continue; + } + + if attrs.nested { + stmts.push(quote! { + self.#field_name.sanitize_config_fields(); + }); + continue; + } + + let ty = &field.ty; + + if is_string(ty) { + stmts.push(quote! { + self.#field_name = crate::sanitize::sanitize_config(&self.#field_name); + }); + } else if is_option_string(ty) { + stmts.push(quote! { + self.#field_name = self.#field_name.as_deref().map(crate::sanitize::sanitize_config); + }); + } else if is_option_vec_string(ty) { + stmts.push(quote! { + self.#field_name = self.#field_name.as_ref().map(|v| { + v.iter().map(|s| crate::sanitize::sanitize_config(s)).collect() + }); + }); + } else if is_vec_string(ty) { + stmts.push(quote! { + self.#field_name = self.#field_name.iter().map(|s| crate::sanitize::sanitize_config(s)).collect(); + }); + } else if is_hashmap_string_string(ty) { + if attrs.sanitize_keys { + stmts.push(quote! { + self.#field_name = self.#field_name.iter().map(|(k, v)| { + (crate::sanitize::sanitize_config(k), crate::sanitize::sanitize_config(v)) + }).collect(); + }); + } else { + stmts.push(quote! { + for v in self.#field_name.values_mut() { + *v = crate::sanitize::sanitize_config(v); + } + }); + } + } + // else: skip (numeric, boolean, enum, complex types) + } + + let expanded = quote! { + impl #impl_generics crate::sanitize::SanitizeConfig for #name #ty_generics #where_clause { + fn sanitize_config_fields(&mut self) { + #(#stmts)* + } + } + }; + + expanded.into() +} + +// ── #[derive(SanitizeContent)] ───────────────────────────────────────────── + +/// Derive macro for `SanitizeContent`. +/// +/// Automatically implements `sanitize_content_fields(&mut self)` by calling +/// `crate::sanitize::sanitize()` on all recognised string-typed fields. +/// +/// # Field attributes +/// +/// - `#[sanitize_content(skip)]` — do not sanitize this field. +/// - `#[sanitize_content(nested)]` — call `sanitize_content_fields()` on this field +/// (it must implement `SanitizeContent`). +/// - `#[sanitize_content(light)]` — apply only control character removal (for +/// structural identifiers like wiki page paths that shouldn't be HTML-escaped). +#[proc_macro_derive(SanitizeContent, attributes(sanitize_content))] +pub fn derive_sanitize_content(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + let name = &input.ident; + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + + let fields = match &input.data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => &fields.named, + _ => { + return syn::Error::new_spanned( + &input.ident, + "SanitizeContent can only be derived for structs with named fields", + ) + .to_compile_error() + .into(); + } + }, + _ => { + return syn::Error::new_spanned( + &input.ident, + "SanitizeContent can only be derived for structs", + ) + .to_compile_error() + .into(); + } + }; + + let mut stmts = Vec::new(); + + for field in fields { + let field_name = field.ident.as_ref().unwrap(); + let attrs = parse_field_attrs(field, "sanitize_content"); + + if attrs.skip { + continue; + } + + if attrs.nested { + stmts.push(quote! { + self.#field_name.sanitize_content_fields(); + }); + continue; + } + + let ty = &field.ty; + + // Choose the sanitization function based on the `light` attribute. + let sanitize_fn = if attrs.light { + quote! { crate::sanitize::sanitize_light } + } else { + quote! { crate::sanitize::sanitize } + }; + + if is_string(ty) { + stmts.push(quote! { + self.#field_name = #sanitize_fn(&self.#field_name); + }); + } else if is_option_string(ty) { + stmts.push(quote! { + self.#field_name = self.#field_name.as_deref().map(#sanitize_fn); + }); + } else if is_option_vec_string(ty) { + stmts.push(quote! { + self.#field_name = self.#field_name.as_ref().map(|v| { + v.iter().map(|s| #sanitize_fn(s)).collect() + }); + }); + } else if is_vec_string(ty) { + stmts.push(quote! { + self.#field_name = self.#field_name.iter().map(|s| #sanitize_fn(s)).collect(); + }); + } + // else: skip + } + + let expanded = quote! { + impl #impl_generics crate::sanitize::SanitizeContent for #name #ty_generics #where_clause { + fn sanitize_content_fields(&mut self) { + #(#stmts)* + } + } + }; + + expanded.into() +} diff --git a/src/compile/mod.rs b/src/compile/mod.rs index e545c6e..2dc4891 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -53,7 +53,14 @@ pub async fn compile_pipeline(input_path: &str, output_path: Option<&str>) -> Re .with_context(|| format!("Failed to read input file: {}", input_path.display()))?; debug!("Input file size: {} bytes", content.len()); - let (front_matter, markdown_body) = parse_markdown(&content)?; + let (mut front_matter, markdown_body) = parse_markdown(&content)?; + + // Sanitize all front matter text fields before any further processing. + // This neutralizes pipeline command injection (##vso[), strips control + // characters, and enforces content limits across all config values. + use crate::sanitize::SanitizeConfig; + front_matter.sanitize_config_fields(); + info!("Parsed agent: '{}'", front_matter.name); debug!("Description: {}", front_matter.description); debug!("Target: {:?}", front_matter.target); @@ -243,7 +250,10 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { ) })?; - let (front_matter, markdown_body) = parse_markdown(&content)?; + let (mut front_matter, markdown_body) = parse_markdown(&content)?; + + use crate::sanitize::SanitizeConfig; + front_matter.sanitize_config_fields(); common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; diff --git a/src/compile/types.rs b/src/compile/types.rs index 85b00f9..38e4c69 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -2,8 +2,10 @@ //! //! This module defines the front matter grammar that is shared across all compile targets. +use ado_aw_derive::SanitizeConfig; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; /// Target platform for compiled pipeline #[derive(Debug, Deserialize, Clone, Default, PartialEq)] @@ -62,7 +64,16 @@ impl PoolConfig { } } -#[derive(Debug, Deserialize, Clone)] +impl SanitizeConfigTrait for PoolConfig { + fn sanitize_config_fields(&mut self) { + match self { + PoolConfig::Name(name) => *name = crate::sanitize::sanitize_config(name), + PoolConfig::Full(full) => full.sanitize_config_fields(), + } + } +} + +#[derive(Debug, Deserialize, Clone, SanitizeConfig)] pub struct PoolConfigFull { pub name: String, #[serde(default)] @@ -110,7 +121,16 @@ impl ScheduleConfig { } } -#[derive(Debug, Deserialize, Clone)] +impl SanitizeConfigTrait for ScheduleConfig { + fn sanitize_config_fields(&mut self) { + match self { + ScheduleConfig::Simple(s) => *s = crate::sanitize::sanitize_config(s), + ScheduleConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + +#[derive(Debug, Deserialize, Clone, SanitizeConfig)] pub struct ScheduleOptions { /// Fuzzy schedule expression (e.g., "daily around 14:00") pub run: String, @@ -172,7 +192,16 @@ impl EngineConfig { } } -#[derive(Debug, Deserialize, Clone)] +impl SanitizeConfigTrait for EngineConfig { + fn sanitize_config_fields(&mut self) { + match self { + EngineConfig::Simple(s) => *s = crate::sanitize::sanitize_config(s), + EngineConfig::Full(opts) => opts.sanitize_config_fields(), + } + } +} + +#[derive(Debug, Deserialize, Clone, SanitizeConfig)] pub struct EngineOptions { /// AI model to use (defaults to claude-opus-4.5) #[serde(default)] @@ -222,6 +251,20 @@ pub struct ToolsConfig { pub azure_devops: Option, } +impl SanitizeConfigTrait for ToolsConfig { + fn sanitize_config_fields(&mut self) { + self.bash = self.bash.as_ref().map(|v| { + v.iter().map(|s| crate::sanitize::sanitize_config(s)).collect() + }); + if let Some(ref mut cm) = self.cache_memory { + cm.sanitize_config_fields(); + } + if let Some(ref mut ado) = self.azure_devops { + ado.sanitize_config_fields(); + } + } +} + /// Cache memory tool configuration — accepts both `true` and object formats /// /// Examples: @@ -260,8 +303,17 @@ impl CacheMemoryToolConfig { } } +impl SanitizeConfigTrait for CacheMemoryToolConfig { + fn sanitize_config_fields(&mut self) { + match self { + CacheMemoryToolConfig::Enabled(_) => {} + CacheMemoryToolConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + /// Cache memory options -#[derive(Debug, Deserialize, Clone, Default)] +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] pub struct CacheMemoryOptions { /// Allowed file extensions (e.g., [".md", ".json", ".txt"]). /// Defaults to all extensions if empty or not specified. @@ -325,8 +377,17 @@ impl AzureDevOpsToolConfig { } } +impl SanitizeConfigTrait for AzureDevOpsToolConfig { + fn sanitize_config_fields(&mut self) { + match self { + AzureDevOpsToolConfig::Enabled(_) => {} + AzureDevOpsToolConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + /// Azure DevOps MCP options -#[derive(Debug, Deserialize, Clone, Default)] +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] pub struct AzureDevOpsOptions { /// ADO API toolset groups to enable (e.g., repos, wit, core, work-items) /// Passed as `-d` flags to the ADO MCP entrypoint. @@ -358,6 +419,14 @@ pub struct RuntimesConfig { pub lean: Option, } +impl SanitizeConfigTrait for RuntimesConfig { + fn sanitize_config_fields(&mut self) { + if let Some(ref mut lean) = self.lean { + lean.sanitize_config_fields(); + } + } +} + /// Azure DevOps runtime parameter definition. /// /// These are emitted as top-level `parameters:` in the generated pipeline YAML, @@ -375,7 +444,7 @@ pub struct RuntimesConfig { /// - debug /// - trace /// ``` -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] +#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, SanitizeConfig)] pub struct PipelineParameter { /// Parameter name (must be a valid ADO identifier) pub name: String, @@ -468,6 +537,53 @@ pub struct FrontMatter { pub parameters: Vec, } +impl SanitizeConfigTrait for FrontMatter { + fn sanitize_config_fields(&mut self) { + self.name = crate::sanitize::sanitize_config(&self.name); + self.description = crate::sanitize::sanitize_config(&self.description); + if let Some(ref mut s) = self.schedule { + s.sanitize_config_fields(); + } + self.workspace = self.workspace.as_deref().map(crate::sanitize::sanitize_config); + if let Some(ref mut p) = self.pool { + p.sanitize_config_fields(); + } + self.engine.sanitize_config_fields(); + if let Some(ref mut t) = self.tools { + t.sanitize_config_fields(); + } + if let Some(ref mut r) = self.runtimes { + r.sanitize_config_fields(); + } + for repo in &mut self.repositories { + repo.sanitize_config_fields(); + } + self.checkout = self.checkout.iter().map(|s| crate::sanitize::sanitize_config(s)).collect(); + for mcp in self.mcp_servers.values_mut() { + mcp.sanitize_config_fields(); + } + // safe_outputs: HashMap — opaque JSON, sanitized at + // Stage 2 execution via get_tool_config() when deserialized into typed configs. + if let Some(ref mut t) = self.triggers { + t.sanitize_config_fields(); + } + if let Some(ref mut n) = self.network { + n.sanitize_config_fields(); + } + // steps, post_steps, setup, teardown: Vec — opaque YAML + // passed through to the pipeline, validated by ADO at parse time. + if let Some(ref mut p) = self.permissions { + p.sanitize_config_fields(); + } + for v in self.env.values_mut() { + *v = crate::sanitize::sanitize_config(v); + } + for p in &mut self.parameters { + p.sanitize_config_fields(); + } + } +} + fn default_model() -> String { "claude-opus-4.5".to_string() } @@ -479,7 +595,7 @@ fn default_model() -> String { /// - Core Azure DevOps/GitHub endpoints (always included) /// - MCP-specific endpoints for each enabled MCP /// - User-specified additional hosts from `allow` field -#[derive(Debug, Deserialize, Clone, Default)] +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] pub struct NetworkConfig { /// Additional allowed host patterns (supports wildcards like *.example.com) /// Core Azure DevOps and GitHub hosts are always allowed. @@ -511,7 +627,7 @@ pub struct NetworkConfig { /// permissions: /// write: my-write-arm-connection /// ``` -#[derive(Debug, Deserialize, Clone, Default)] +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] pub struct PermissionsConfig { /// ARM service connection for read-only ADO access. /// Token is minted and given to the agent in Stage 1 (inside AWF sandbox). @@ -525,7 +641,7 @@ pub struct PermissionsConfig { } /// Repository resource definition -#[derive(Debug, Deserialize, Clone)] +#[derive(Debug, Deserialize, Clone, SanitizeConfig)] pub struct Repository { pub repository: String, #[serde(rename = "type")] @@ -548,8 +664,17 @@ pub enum McpConfig { WithOptions(McpOptions), } +impl SanitizeConfigTrait for McpConfig { + fn sanitize_config_fields(&mut self) { + match self { + McpConfig::Enabled(_) => {} + McpConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + /// Detailed MCP options -#[derive(Debug, Deserialize, Clone, Default)] +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] pub struct McpOptions { /// Whether this MCP is enabled (default: true) #[serde(default)] @@ -594,8 +719,16 @@ pub struct TriggerConfig { pub pipeline: Option, } +impl SanitizeConfigTrait for TriggerConfig { + fn sanitize_config_fields(&mut self) { + if let Some(ref mut p) = self.pipeline { + p.sanitize_config_fields(); + } + } +} + /// Pipeline completion trigger configuration -#[derive(Debug, Deserialize, Clone)] +#[derive(Debug, Deserialize, Clone, SanitizeConfig)] pub struct PipelineTrigger { /// The name of the source pipeline that triggers this one pub name: String, diff --git a/src/execute.rs b/src/execute.rs index 7583199..295aab7 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -10,7 +10,7 @@ use std::collections::HashMap; use std::path::Path; use crate::ndjson::{self, SAFE_OUTPUT_FILENAME}; -use crate::sanitize::Sanitize; +use crate::sanitize::SanitizeContent; use crate::safeoutputs::{ AddBuildTagResult, AddPrCommentResult, CreateBranchResult, CreateGitTagResult, CreatePrResult, CreateWikiPageResult, CreateWorkItemResult, CommentOnWorkItemResult, @@ -395,7 +395,7 @@ pub async fn execute_safe_output( "report-incomplete" => { let mut output: ReportIncompleteResult = serde_json::from_value(entry.clone()) .map_err(|e| anyhow::anyhow!("Failed to parse report-incomplete: {}", e))?; - output.sanitize_fields(); + output.sanitize_content_fields(); debug!("report-incomplete: {}", output.reason); ExecutionResult::failure(format!("Agent reported task incomplete: {}", output.reason)) } diff --git a/src/mcp.rs b/src/mcp.rs index 25e1fe5..4af854b 100644 --- a/src/mcp.rs +++ b/src/mcp.rs @@ -9,7 +9,7 @@ use serde_json::Value; use std::path::PathBuf; use crate::ndjson::{self, SAFE_OUTPUT_FILENAME}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::safeoutputs::{ AddBuildTagParams, AddBuildTagResult, AddPrCommentParams, AddPrCommentResult, @@ -635,7 +635,7 @@ fields you want to update." info!("Tool called: update-work-item - id={}", params.0.id); let mut result: UpdateWorkItemResult = params.0.try_into()?; // Sanitize before persisting to NDJSON (defense-in-depth; Stage 2 sanitizes again) - result.sanitize_fields(); + result.sanitize_content_fields(); self.write_safe_output_file(&result).await .map_err(|e| anyhow_to_mcp_error(anyhow::anyhow!("Failed to write safe output: {}", e)))?; info!("Work item update queued for #{}", result.id); diff --git a/src/runtimes/lean.rs b/src/runtimes/lean.rs index 22721a5..e90fe53 100644 --- a/src/runtimes/lean.rs +++ b/src/runtimes/lean.rs @@ -8,8 +8,11 @@ //! Lean is installed via elan (the Lean toolchain manager) into `$HOME/.elan/bin`, //! then symlinked into `/tmp/awf-tools/` for AWF chroot compatibility. +use ado_aw_derive::SanitizeConfig; use serde::Deserialize; +use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; + /// Lean 4 runtime configuration — accepts both `true` and object formats /// /// Examples: @@ -50,8 +53,17 @@ impl LeanRuntimeConfig { } } +impl SanitizeConfigTrait for LeanRuntimeConfig { + fn sanitize_config_fields(&mut self) { + match self { + LeanRuntimeConfig::Enabled(_) => {} + LeanRuntimeConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + /// Lean 4 options -#[derive(Debug, Deserialize, Clone, Default)] +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] pub struct LeanOptions { /// Lean toolchain to install (e.g., "stable", "leanprover/lean4:v4.29.1"). /// Defaults to "stable" if not specified. If a `lean-toolchain` file exists diff --git a/src/safeoutputs/add_build_tag.rs b/src/safeoutputs/add_build_tag.rs index 9eed8b9..4f0ea2a 100644 --- a/src/safeoutputs/add_build_tag.rs +++ b/src/safeoutputs/add_build_tag.rs @@ -1,12 +1,13 @@ //! Add build tag safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -55,8 +56,8 @@ tool_result! { // ── Stage 2: Sanitization ───────────────────────────────────────────────── -impl Sanitize for AddBuildTagResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for AddBuildTagResult { + fn sanitize_content_fields(&mut self) { self.tag = sanitize_text(&self.tag); } } @@ -74,7 +75,7 @@ impl Sanitize for AddBuildTagResult { /// tag-prefix: "agent-" /// allow-any-build: false /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct AddBuildTagConfig { /// Restrict which tags can be applied. Empty means any tag is allowed. /// Supports simple wildcard patterns: entries ending with `*` match by prefix. diff --git a/src/safeoutputs/add_pr_comment.rs b/src/safeoutputs/add_pr_comment.rs index 959156d..52e93e2 100644 --- a/src/safeoutputs/add_pr_comment.rs +++ b/src/safeoutputs/add_pr_comment.rs @@ -6,7 +6,8 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -108,8 +109,8 @@ tool_result! { } } -impl Sanitize for AddPrCommentResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for AddPrCommentResult { + fn sanitize_content_fields(&mut self) { self.content = sanitize_text(&self.content); // Strip control characters from structural fields for defense-in-depth self.repository = self.repository.chars().filter(|c| !c.is_control()).collect(); @@ -134,7 +135,7 @@ impl Sanitize for AddPrCommentResult { /// - Active /// - Closed /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct AddPrCommentConfig { /// Prefix prepended to all comments (e.g., "[Agent Review] ") #[serde(default, rename = "comment-prefix")] diff --git a/src/safeoutputs/comment_on_work_item.rs b/src/safeoutputs/comment_on_work_item.rs index 0032e0b..f53cf0f 100644 --- a/src/safeoutputs/comment_on_work_item.rs +++ b/src/safeoutputs/comment_on_work_item.rs @@ -6,7 +6,8 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -40,8 +41,8 @@ tool_result! { } } -impl Sanitize for CommentOnWorkItemResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for CommentOnWorkItemResult { + fn sanitize_content_fields(&mut self) { self.body = sanitize_text(&self.body); } } @@ -94,7 +95,7 @@ impl CommentTarget { /// max: 5 /// target: "*" /// ``` -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Default, Serialize, Deserialize)] pub struct CommentOnWorkItemConfig { /// Target scope — which work items can be commented on. /// `None` means no target was configured; execution must reject this. diff --git a/src/safeoutputs/create_branch.rs b/src/safeoutputs/create_branch.rs index b63d3bd..0c710bb 100644 --- a/src/safeoutputs/create_branch.rs +++ b/src/safeoutputs/create_branch.rs @@ -1,12 +1,13 @@ //! Create branch safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::{PATH_SEGMENT, validate_git_ref_name}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -88,8 +89,8 @@ tool_result! { } } -impl Sanitize for CreateBranchResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for CreateBranchResult { + fn sanitize_content_fields(&mut self) { self.branch_name = sanitize_text(&self.branch_name); } } @@ -108,7 +109,7 @@ impl Sanitize for CreateBranchResult { /// - main /// - develop /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct CreateBranchConfig { /// Regex pattern that branch names must match #[serde(default, rename = "branch-pattern")] diff --git a/src/safeoutputs/create_git_tag.rs b/src/safeoutputs/create_git_tag.rs index 4bff86b..a163582 100644 --- a/src/safeoutputs/create_git_tag.rs +++ b/src/safeoutputs/create_git_tag.rs @@ -1,12 +1,13 @@ //! Create git tag safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::{PATH_SEGMENT, validate_git_ref_name}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -92,8 +93,8 @@ tool_result! { } } -impl Sanitize for CreateGitTagResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for CreateGitTagResult { + fn sanitize_content_fields(&mut self) { // tag_name is a structural identifier — only strip control characters self.tag_name = self .tag_name @@ -123,7 +124,7 @@ impl Sanitize for CreateGitTagResult { /// - my-lib /// message-prefix: "[release] " /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct CreateGitTagConfig { /// Regex pattern that tag names must match (if configured) #[serde(default, rename = "tag-pattern")] diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index 60ba590..7729548 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -5,8 +5,9 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use tokio::process::Command; +use ado_aw_derive::SanitizeConfig; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, ToolResult, Validate}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use anyhow::{Context, ensure}; @@ -309,8 +310,8 @@ tool_result! { } } -impl Sanitize for CreatePrResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for CreatePrResult { + fn sanitize_content_fields(&mut self) { self.title = sanitize_text(&self.title); self.description = sanitize_text(&self.description); for label in &mut self.agent_labels { @@ -390,7 +391,7 @@ pub enum ProtectedFiles { /// - "automated" /// - "agent-created" /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct CreatePrConfig { /// Target branch to merge into (default: "main") #[serde(default = "default_target_branch", rename = "target-branch")] diff --git a/src/safeoutputs/create_wiki_page.rs b/src/safeoutputs/create_wiki_page.rs index 09889be..6de3851 100644 --- a/src/safeoutputs/create_wiki_page.rs +++ b/src/safeoutputs/create_wiki_page.rs @@ -8,7 +8,8 @@ use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; use super::resolve_wiki_branch; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; @@ -67,8 +68,8 @@ tool_result! { } } -impl Sanitize for CreateWikiPageResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for CreateWikiPageResult { + fn sanitize_content_fields(&mut self) { // Path is a structural identifier — sanitize lightly (remove control chars) // but do not escape HTML or neutralize patterns that are valid in wiki paths. self.path = self @@ -96,7 +97,7 @@ impl Sanitize for CreateWikiPageResult { /// title-prefix: "[Agent] " /// comment: "Created by agent" /// ``` -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Default, Serialize, Deserialize)] pub struct CreateWikiPageConfig { /// Wiki identifier (name or ID). Required — execution fails without this. /// @@ -628,7 +629,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: CreateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); assert!(!result.path.contains('\x01')); } @@ -640,7 +641,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: CreateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); assert_eq!(result.path, "/Folder/My Page"); } @@ -654,7 +655,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: CreateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); let ctx = crate::safeoutputs::ExecutionContext { ado_org_url: Some("https://dev.azure.com/myorg".to_string()), @@ -683,7 +684,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: CreateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); let ctx = crate::safeoutputs::ExecutionContext { ado_org_url: None, diff --git a/src/safeoutputs/create_work_item.rs b/src/safeoutputs/create_work_item.rs index 7804c4e..02c8e1b 100644 --- a/src/safeoutputs/create_work_item.rs +++ b/src/safeoutputs/create_work_item.rs @@ -8,7 +8,8 @@ use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use anyhow::{Context, ensure}; /// Parameters for creating a work item @@ -40,8 +41,8 @@ tool_result! { } } -impl Sanitize for CreateWorkItemResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for CreateWorkItemResult { + fn sanitize_content_fields(&mut self) { self.title = sanitize_text(&self.title); self.description = sanitize_text(&self.description); } @@ -65,7 +66,7 @@ impl Sanitize for CreateWorkItemResult { /// repository: "my-repo-name" # optional, defaults to current repo /// branch: "main" # optional, defaults to "main" /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct CreateWorkItemConfig { /// Work item type (default: "Task") #[serde(default = "default_work_item_type", rename = "work-item-type")] @@ -90,15 +91,17 @@ pub struct CreateWorkItemConfig { /// Additional custom fields as key-value pairs /// Keys should be the full field reference name (e.g., "Custom.MyField") #[serde(default, rename = "custom-fields")] + #[sanitize_config(sanitize_keys)] pub custom_fields: std::collections::HashMap, /// Artifact link configuration for GitHub Copilot integration #[serde(default, rename = "artifact-link")] + #[sanitize_config(nested)] pub artifact_link: ArtifactLinkConfig, } /// Configuration for artifact links (repository linking for GitHub Copilot) -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct ArtifactLinkConfig { /// Whether to add an artifact link to the work item (default: false) #[serde(default)] diff --git a/src/safeoutputs/link_work_items.rs b/src/safeoutputs/link_work_items.rs index c7452b7..9443113 100644 --- a/src/safeoutputs/link_work_items.rs +++ b/src/safeoutputs/link_work_items.rs @@ -6,7 +6,8 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use crate::safeoutputs::comment_on_work_item::CommentTarget; @@ -91,8 +92,8 @@ tool_result! { } } -impl Sanitize for LinkWorkItemsResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for LinkWorkItemsResult { + fn sanitize_content_fields(&mut self) { self.link_type = sanitize_text(&self.link_type); self.comment = self.comment.as_deref().map(sanitize_text); } @@ -110,7 +111,7 @@ impl Sanitize for LinkWorkItemsResult { /// - child /// - related /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct LinkWorkItemsConfig { /// Restrict which link types the agent may use. /// An empty list (the default) means all link types are allowed. diff --git a/src/safeoutputs/queue_build.rs b/src/safeoutputs/queue_build.rs index f6b1412..c08cd0a 100644 --- a/src/safeoutputs/queue_build.rs +++ b/src/safeoutputs/queue_build.rs @@ -6,7 +6,8 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -64,8 +65,8 @@ tool_result! { } } -impl Sanitize for QueueBuildResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for QueueBuildResult { + fn sanitize_content_fields(&mut self) { if let Some(reason) = &self.reason { self.reason = Some(sanitize_text(reason)); } @@ -100,7 +101,7 @@ impl Sanitize for QueueBuildResult { /// - version /// default-branch: main /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct QueueBuildConfig { /// Pipeline definition IDs that are allowed to be triggered (REQUIRED — empty rejects all) #[serde(default, rename = "allowed-pipelines")] diff --git a/src/safeoutputs/reply_to_pr_comment.rs b/src/safeoutputs/reply_to_pr_comment.rs index 99b0db0..a5f6a39 100644 --- a/src/safeoutputs/reply_to_pr_comment.rs +++ b/src/safeoutputs/reply_to_pr_comment.rs @@ -1,12 +1,13 @@ //! Reply to PR review comment safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -58,8 +59,8 @@ tool_result! { } } -impl Sanitize for ReplyToPrCommentResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for ReplyToPrCommentResult { + fn sanitize_content_fields(&mut self) { self.content = sanitize_text(&self.content); } } @@ -75,7 +76,7 @@ impl Sanitize for ReplyToPrCommentResult { /// - self /// - other-repo /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct ReplyToPrCommentConfig { /// Prefix prepended to all replies (e.g., "[Agent] ") #[serde(default, rename = "comment-prefix")] diff --git a/src/safeoutputs/report_incomplete.rs b/src/safeoutputs/report_incomplete.rs index 9dfe1a9..abb952a 100644 --- a/src/safeoutputs/report_incomplete.rs +++ b/src/safeoutputs/report_incomplete.rs @@ -3,7 +3,7 @@ use schemars::JsonSchema; use serde::Deserialize; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::Validate; use anyhow::ensure; @@ -40,8 +40,8 @@ tool_result! { } } -impl Sanitize for ReportIncompleteResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for ReportIncompleteResult { + fn sanitize_content_fields(&mut self) { self.reason = sanitize_text(&self.reason); if let Some(ref ctx) = self.context { self.context = Some(sanitize_text(ctx)); diff --git a/src/safeoutputs/resolve_pr_thread.rs b/src/safeoutputs/resolve_pr_thread.rs index 49a77c6..c6f215c 100644 --- a/src/safeoutputs/resolve_pr_thread.rs +++ b/src/safeoutputs/resolve_pr_thread.rs @@ -1,5 +1,6 @@ //! Resolve PR review thread safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; @@ -7,7 +8,7 @@ use serde::{Deserialize, Serialize}; use super::resolve_repo_name; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -86,8 +87,8 @@ tool_result! { } } -impl Sanitize for ResolvePrThreadResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for ResolvePrThreadResult { + fn sanitize_content_fields(&mut self) { self.status = sanitize_text(&self.status); if let Some(ref repo) = self.repository { self.repository = Some(sanitize_text(repo)); @@ -108,7 +109,7 @@ impl Sanitize for ResolvePrThreadResult { /// - fixed /// - wont-fix /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct ResolvePrThreadConfig { /// Restrict which repositories the agent can operate on. /// If empty, all repositories in the checkout list (plus "self") are allowed. diff --git a/src/safeoutputs/result.rs b/src/safeoutputs/result.rs index a7dcfb8..4678f75 100644 --- a/src/safeoutputs/result.rs +++ b/src/safeoutputs/result.rs @@ -5,7 +5,7 @@ use rmcp::model::ErrorCode; use serde::Serialize; use std::collections::HashMap; -use crate::sanitize::Sanitize; +use crate::sanitize::{SanitizeConfig, SanitizeContent}; /// Trait for tool results that include a name field pub trait ToolResult: Serialize { @@ -59,12 +59,19 @@ pub struct ExecutionContext { } impl ExecutionContext { - /// Get typed configuration for a specific tool - pub fn get_tool_config(&self, tool_name: &str) -> T { - self.tool_configs + /// Get typed configuration for a specific tool. + /// + /// Deserializes the tool's JSON config from front matter and applies + /// [`SanitizeConfig`] to all textual fields before returning. The + /// `SanitizeConfig` bound acts as a compile-time forcing function: + /// adding a new config struct without implementing the trait won't compile. + pub fn get_tool_config(&self, tool_name: &str) -> T { + let mut config: T = self.tool_configs .get(tool_name) .and_then(|v| serde_json::from_value(v.clone()).ok()) - .unwrap_or_default() + .unwrap_or_default(); + config.sanitize_config_fields(); + config } } @@ -186,7 +193,7 @@ impl ExecutionResult { /// Stage 2 parses these outputs and calls `execute` on each to perform /// the actual action (e.g., create work items, update files, etc.) #[async_trait::async_trait] -pub trait Executor: Sanitize + Send + Sync { +pub trait Executor: SanitizeContent + Send + Sync { /// Internal execution logic. Implementors define this; callers should /// use `execute_sanitized()` instead to ensure inputs are sanitized. async fn execute_impl(&self, ctx: &ExecutionContext) -> anyhow::Result; @@ -197,7 +204,7 @@ pub trait Executor: Sanitize + Send + Sync { /// `sanitize_fields()` is called before `execute_impl()`, making it impossible /// to accidentally skip sanitization. async fn execute_sanitized(&mut self, ctx: &ExecutionContext) -> anyhow::Result { - self.sanitize_fields(); + self.sanitize_content_fields(); self.execute_impl(ctx).await } } diff --git a/src/safeoutputs/submit_pr_review.rs b/src/safeoutputs/submit_pr_review.rs index af60618..5a35d3a 100644 --- a/src/safeoutputs/submit_pr_review.rs +++ b/src/safeoutputs/submit_pr_review.rs @@ -1,12 +1,13 @@ //! Submit PR review safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::{PATH_SEGMENT, resolve_repo_name}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -94,8 +95,8 @@ tool_result! { } } -impl Sanitize for SubmitPrReviewResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for SubmitPrReviewResult { + fn sanitize_content_fields(&mut self) { self.event = sanitize_text(&self.event); self.body = self.body.as_deref().map(sanitize_text); self.repository = self.repository.as_deref().map(sanitize_text); @@ -114,7 +115,7 @@ impl Sanitize for SubmitPrReviewResult { /// allowed-repositories: /// - self /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct SubmitPrReviewConfig { /// Which events are permitted. REQUIRED — empty list rejects all. #[serde(default, rename = "allowed-events")] diff --git a/src/safeoutputs/update_pr.rs b/src/safeoutputs/update_pr.rs index 32d92b0..badb6d2 100644 --- a/src/safeoutputs/update_pr.rs +++ b/src/safeoutputs/update_pr.rs @@ -1,12 +1,13 @@ //! Update pull request safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info, warn}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::{PATH_SEGMENT, resolve_repo_name}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -147,8 +148,8 @@ tool_result! { } } -impl Sanitize for UpdatePrResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for UpdatePrResult { + fn sanitize_content_fields(&mut self) { self.repository = self.repository.as_deref().map(sanitize_text); self.operation = sanitize_text(&self.operation); self.reviewers = self @@ -184,7 +185,7 @@ impl Sanitize for UpdatePrResult { /// - approve /// - reject /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct UpdatePrConfig { /// Which operations are permitted. Empty list means all operations are allowed. #[serde(default, rename = "allowed-operations")] diff --git a/src/safeoutputs/update_wiki_page.rs b/src/safeoutputs/update_wiki_page.rs index 05b901c..ae44093 100644 --- a/src/safeoutputs/update_wiki_page.rs +++ b/src/safeoutputs/update_wiki_page.rs @@ -8,7 +8,8 @@ use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; use super::resolve_wiki_branch; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; @@ -63,8 +64,8 @@ tool_result! { } } -impl Sanitize for UpdateWikiPageResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for UpdateWikiPageResult { + fn sanitize_content_fields(&mut self) { // Path is a structural identifier — sanitize lightly (remove control chars) // but do not escape HTML or neutralize patterns that are valid in wiki paths. self.path = self @@ -92,7 +93,7 @@ impl Sanitize for UpdateWikiPageResult { /// title-prefix: "[Agent] " /// comment: "Updated by agent" /// ``` -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Default, Serialize, Deserialize)] pub struct UpdateWikiPageConfig { /// Wiki identifier (name or ID). Required — execution fails without this. /// @@ -598,7 +599,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: UpdateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); assert!(!result.path.contains('\x01')); } @@ -610,7 +611,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: UpdateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); assert_eq!(result.path, "/Folder/My Page"); } @@ -624,7 +625,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: UpdateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); let ctx = crate::safeoutputs::ExecutionContext { ado_org_url: Some("https://dev.azure.com/myorg".to_string()), @@ -653,7 +654,7 @@ wiki-name: "MyProject.wiki" comment: None, }; let mut result: UpdateWikiPageResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); let ctx = crate::safeoutputs::ExecutionContext { ado_org_url: None, diff --git a/src/safeoutputs/update_work_item.rs b/src/safeoutputs/update_work_item.rs index e2bf59c..de0d939 100644 --- a/src/safeoutputs/update_work_item.rs +++ b/src/safeoutputs/update_work_item.rs @@ -8,7 +8,8 @@ use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use ado_aw_derive::SanitizeConfig; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use anyhow::{Context, ensure}; /// Parameters for updating a work item @@ -87,8 +88,8 @@ tool_result! { } } -impl Sanitize for UpdateWorkItemResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for UpdateWorkItemResult { + fn sanitize_content_fields(&mut self) { self.title = self.title.as_deref().map(sanitize_text); self.body = self.body.as_deref().map(sanitize_text); self.state = self.state.as_deref().map(sanitize_text); @@ -131,7 +132,7 @@ pub enum TargetConfig { /// assignee: true # enable assignee updates /// tags: true # enable tag updates /// ``` -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Default, Serialize, Deserialize)] pub struct UpdateWorkItemConfig { /// Enable state/status updates via the `state` agent parameter (default: false). /// The YAML key for this option is `status`. @@ -880,7 +881,7 @@ target: 42 tags: Some(vec!["tag-one".to_string(), "tag @two".to_string()]), }; let mut result: UpdateWorkItemResult = params.try_into().unwrap(); - result.sanitize_fields(); + result.sanitize_content_fields(); // @mentions should be neutralized assert!(result.title.as_deref().unwrap().contains("`@user`")); diff --git a/src/safeoutputs/upload_attachment.rs b/src/safeoutputs/upload_attachment.rs index 53e0883..08e5299 100644 --- a/src/safeoutputs/upload_attachment.rs +++ b/src/safeoutputs/upload_attachment.rs @@ -1,12 +1,13 @@ //! Upload attachment safe output tool +use ado_aw_derive::SanitizeConfig; use log::{debug, info, warn}; use percent_encoding::utf8_percent_encode; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use super::PATH_SEGMENT; -use crate::sanitize::{Sanitize, sanitize as sanitize_text}; +use crate::sanitize::{SanitizeContent, sanitize as sanitize_text}; use crate::tool_result; use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, Validate}; use anyhow::{Context, ensure}; @@ -73,8 +74,8 @@ tool_result! { } } -impl Sanitize for UploadAttachmentResult { - fn sanitize_fields(&mut self) { +impl SanitizeContent for UploadAttachmentResult { + fn sanitize_content_fields(&mut self) { if let Some(comment) = &self.comment { self.comment = Some(sanitize_text(comment)); } @@ -96,7 +97,7 @@ const DEFAULT_MAX_FILE_SIZE: u64 = 5 * 1024 * 1024; // 5 MB /// - .log /// comment-prefix: "[Agent] " /// ``` -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)] pub struct UploadAttachmentConfig { /// Maximum file size in bytes (default: 5 MB) #[serde(default = "default_max_file_size", rename = "max-file-size")] diff --git a/src/sanitize.rs b/src/sanitize.rs index 136ebd8..1fa52c3 100644 --- a/src/sanitize.rs +++ b/src/sanitize.rs @@ -4,16 +4,42 @@ //! against template injection and prompt injection in Azure DevOps contexts. //! This module is shared across Stage 1 (safe output creation), threat analysis //! ingestion, and Stage 2 (safe output execution). +//! +//! Two traits cover different trust boundaries: +//! +//! - [`SanitizeContent`] — for agent-generated content (safe-output results). +//! Applies the full sanitization pipeline including HTML escaping, @mention +//! wrapping, bot trigger neutralization, etc. +//! - [`SanitizeConfig`] — for operator-controlled configuration values (front +//! matter and safe-output configs). Applies a lighter pipeline that protects +//! against pipeline command injection and control characters without corrupting +//! identifiers like area paths, wiki names, or assignee emails. use log::debug; -/// Trait for types that contain untrusted text fields requiring sanitization. +/// Trait for types that contain untrusted agent-generated text fields. /// /// Implement this on safe output result structs so Stage 2 execution can -/// call `sanitize_fields()` before dispatching to Azure DevOps APIs. -pub trait Sanitize { - /// Apply the sanitization pipeline to all untrusted text fields in-place. - fn sanitize_fields(&mut self); +/// call `sanitize_content_fields()` before dispatching to Azure DevOps APIs. +/// +/// Use `#[derive(SanitizeContent)]` from the `ado-aw-derive` crate for automatic +/// implementation on structs with named fields. +pub trait SanitizeContent { + /// Apply the full sanitization pipeline to all untrusted content fields in-place. + fn sanitize_content_fields(&mut self); +} + +/// Trait for types that contain operator-controlled configuration text fields. +/// +/// Implement this on front matter structs and safe-output config structs so +/// that all textual values are sanitized before use in template substitution +/// or API calls. +/// +/// Use `#[derive(SanitizeConfig)]` from the `ado-aw-derive` crate for automatic +/// implementation on structs with named fields. +pub trait SanitizeConfig { + /// Apply the config-appropriate sanitization pipeline to all text fields in-place. + fn sanitize_config_fields(&mut self); } /// Maximum content size in bytes (IS-08) @@ -49,6 +75,39 @@ pub fn sanitize(input: &str) -> String { s } +/// Sanitize operator-controlled configuration values. +/// +/// Applies a subset of the full pipeline appropriate for config identifiers: +/// 1. Remove ANSI escape sequences and control characters (IS-09) +/// 2. Neutralize pipeline commands (`##vso[`, `##[`) +/// 3. Apply content size limits (IS-08) +/// +/// Skips HTML escaping, @mention wrapping, bot trigger neutralization, XML +/// comment removal, and URL protocol sanitization — these are content-rendering +/// concerns that would corrupt identifiers like area paths, wiki names, or emails. +pub fn sanitize_config(input: &str) -> String { + let mut s = remove_control_characters(input); + s = neutralize_pipeline_commands(&s); + s = enforce_content_limits(&s); + debug!( + "Sanitized config value: {} -> {} bytes", + input.len(), + s.len() + ); + s +} + +/// Light sanitization: only remove control characters. +/// +/// Used for structural identifiers (e.g., wiki page paths) that must not have +/// their content altered beyond stripping unsafe control sequences. +pub fn sanitize_light(input: &str) -> String { + input + .chars() + .filter(|c| !c.is_control() || *c == '\n' || *c == '\t' || *c == '\r') + .collect() +} + // ── IS-09: Control character & ANSI escape removal ───────────────────────── /// Remove ANSI escape sequences and unsafe control characters. @@ -542,4 +601,65 @@ mod tests { assert!(result.contains("## Sub-heading")); assert!(result.contains("#123")); } + + // ── sanitize_config tests ───────────────────────────────────────────── + + #[test] + fn test_sanitize_config_neutralizes_pipeline_commands() { + let input = "##vso[task.setvariable variable=secret]value"; + let result = sanitize_config(input); + assert!(result.contains("`##vso[`")); + assert!(!result.contains("##vso[task.")); + } + + #[test] + fn test_sanitize_config_removes_control_chars() { + let input = "hello\x00world\x07!"; + assert_eq!(sanitize_config(input), "helloworld!"); + } + + #[test] + fn test_sanitize_config_preserves_html_tags() { + let input = "area-path: \\Team"; + let result = sanitize_config(input); + assert!(result.contains(""), "Config sanitize should NOT escape HTML tags"); + } + + #[test] + fn test_sanitize_config_preserves_at_mentions() { + let input = "user@example.com"; + assert_eq!(sanitize_config(input), input, "Config sanitize should NOT wrap @mentions"); + } + + #[test] + fn test_sanitize_config_preserves_bot_triggers() { + let input = "fixes #123"; + assert_eq!(sanitize_config(input), input, "Config sanitize should NOT neutralize bot triggers"); + } + + #[test] + fn test_sanitize_config_preserves_normal_text() { + let input = "MyProject\\Team\\Sprint 1"; + assert_eq!(sanitize_config(input), input); + } + + // ── sanitize_light tests ────────────────────────────────────────────── + + #[test] + fn test_sanitize_light_removes_control_chars() { + let input = "/Overview/\x01Page"; + assert_eq!(sanitize_light(input), "/Overview/Page"); + } + + #[test] + fn test_sanitize_light_preserves_whitespace() { + let input = "path/with\ttab\nand\nnewlines"; + assert_eq!(sanitize_light(input), input); + } + + #[test] + fn test_sanitize_light_preserves_everything_else() { + let input = " @user ##vso[cmd] fixes #42"; + assert_eq!(sanitize_light(input), input, "Light sanitize should only remove control chars"); + } }