Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ schedule: daily around 14:00 # Fuzzy schedule syntax - see Schedule Syntax secti
# branches:
# - main
# - release/*
workspace: repo # Optional: "root" or "repo". If not specified, defaults based on checkout configuration (see below).
workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults based on checkout configuration (see below).
pool: AZS-1ES-L-MMS-ubuntu-22.04 # Agent pool name (string format). Defaults to AZS-1ES-L-MMS-ubuntu-22.04.
# pool: # Alternative object format (required for 1ES if specifying os)
# name: AZS-1ES-L-MMS-ubuntu-22.04
Expand Down Expand Up @@ -762,15 +762,27 @@ If `timeout-minutes` is not configured, this is replaced with an empty string.
Should be replaced with the appropriate working directory based on the effective workspace setting.

**Workspace Resolution Logic:**
1. If `workspace` is explicitly set in front matter, that value is used
1. If `workspace` is explicitly set in front matter, that value is used (after validation)
2. If `workspace` is not set and `checkout:` contains additional repositories, defaults to `repo`
3. If `workspace` is not set and only `self` is checked out, defaults to `root`

**Warning:** If `workspace: repo` is explicitly set but no additional repositories are in `checkout:`, a warning is emitted because when only `self` is checked out, `$(Build.SourcesDirectory)` already contains the repository content directly.
**Warning:** If `workspace: repo` (or `self`) is explicitly set but no additional repositories are in `checkout:`, a warning is emitted because when only `self` is checked out, `$(Build.SourcesDirectory)` already contains the repository content directly.

**Values:**
- `root`: `$(Build.SourcesDirectory)` - the checkout root directory
- `repo`: `$(Build.SourcesDirectory)/$(Build.Repository.Name)` - the repository's subfolder
**Accepted values:**
- `root` → `$(Build.SourcesDirectory)` — the checkout root directory
- `repo` (or `self`) → `$(Build.SourcesDirectory)/$(Build.Repository.Name)` — the trigger repository's subfolder
- `<alias>` → `$(Build.SourcesDirectory)/<alias>` — a specific checked-out repository's subfolder. The alias must appear in the `checkout:` list (which itself must be a subset of `repositories:`). This form is only valid when at least one additional repository is checked out; otherwise compilation fails.

**Example — pointing the agent's workspace at a checked-out repository:**
```yaml
repositories:
- repository: exp23-a7-nw
type: git
name: msazuresphere/exp23-a7-nw
checkout:
- exp23-a7-nw
workspace: exp23-a7-nw # Resolves to $(Build.SourcesDirectory)/exp23-a7-nw
```

This is used for the `workingDirectory` property of the copilot task.

Expand Down
236 changes: 217 additions & 19 deletions src/compile/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,13 @@ pub fn generate_checkout_self() -> String {
"- checkout: self".to_string()
}

/// Names that are reserved by the `workspace:` resolver and therefore cannot
/// be used as repository aliases / `checkout:` entries. If a user defines a
/// repository named `repo` and writes `workspace: repo`, the special-cased
/// reserved arm would silently win over the alias resolution, producing the
/// wrong working directory. We reject this at compile time instead.
const RESERVED_WORKSPACE_NAMES: &[&str] = &["root", "repo", "self"];

/// Validate that all entries in checkout list exist in repositories
pub fn validate_checkout_list(repositories: &[Repository], checkout: &[String]) -> Result<()> {
if checkout.is_empty() {
Expand All @@ -321,40 +328,119 @@ pub fn validate_checkout_list(repositories: &[Repository], checkout: &[String])
repo_names
);
}
if RESERVED_WORKSPACE_NAMES.contains(&name.as_str()) {
anyhow::bail!(
"Checkout entry '{}' uses a name reserved by the 'workspace:' resolver \
({:?}). Rename the repository alias to avoid ambiguity with \
'workspace: {}'.",
name,
RESERVED_WORKSPACE_NAMES,
name
);
}
}

Ok(())
}

/// Sentinel prefix used to encode a repository-alias workspace selection
/// in the string returned by [`compute_effective_workspace`]. The prefix is
/// only ever produced internally by `compute_effective_workspace` from a
/// user-supplied alias that has just been checked against the `checkout:`
/// list, so the encoded value never round-trips back through user input.
const WORKSPACE_ALIAS_PREFIX: &str = "alias:";

/// Compute the effective workspace based on explicit setting and checkout configuration.
///
/// Accepted values for `explicit_workspace`:
/// - `"root"` — `$(Build.SourcesDirectory)` (the checkout root)
/// - `"repo"` or `"self"` — the trigger repository's subfolder
/// - any repository alias listed in `checkout` — that repository's subfolder
///
/// Returns an encoded string that [`generate_working_directory`] resolves to
/// the actual ADO path expression.
pub fn compute_effective_workspace(
explicit_workspace: &Option<String>,
checkout: &[String],
agent_name: &str,
) -> String {
) -> Result<String> {
let has_additional_checkouts = !checkout.is_empty();

match explicit_workspace {
Some(ws) if ws == "repo" && !has_additional_checkouts => {
eprintln!(
"Warning: Agent '{}' has workspace: repo but no additional repositories in checkout. \
When only 'self' is checked out, $(Build.SourcesDirectory) already contains the repository content. \
The workspace setting has no effect in this case.",
agent_name
);
"repo".to_string()
Some(ws) => {
let ws = ws.as_str();
match ws {
"root" => Ok("root".to_string()),
"repo" | "self" => {
if !has_additional_checkouts {
eprintln!(
"Warning: Agent '{}' has workspace: {} but no additional repositories in checkout. \
When only 'self' is checked out, $(Build.SourcesDirectory) already contains the repository content. \
The workspace setting has no effect in this case.",
agent_name, ws
);
}
Ok("repo".to_string())
}
alias => {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot do we not have a validators library that will detect path escaping?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — src/validate.rs is the shared home for structural validators but it didn't have a path-segment helper (only is_valid_command_path for full paths, plus an inline .. check buried in cache_memory/execute.rs). Added validate::is_safe_path_segment in 6e9979c with its own unit tests, and switched the workspace alias check to use it.

// Defense in depth: even though aliases are constrained
// by `validate_checkout_list` to match a `repository:`
// name, refuse anything that could escape the workspace
// root once embedded into the working directory path.
if !validate::is_safe_path_segment(alias) {
anyhow::bail!(
"Agent '{}' has workspace: '{}' which is not a safe path \
segment. Repository aliases must not be empty, contain '..', \
'/', '\\\\' or start with '.'.",
agent_name,
alias
);
}
// A single contains() check covers both "alias not in
// checkout" and "checkout is empty" — produce one error
// message that clearly lists what would have been valid.
if !checkout.iter().any(|c| c == alias) {
if checkout.is_empty() {
anyhow::bail!(
"Agent '{}' has workspace: '{}' but no additional repositories are checked out. \
A repository alias for workspace is only valid when at least one repository appears in 'checkout:'. \
Use 'root', 'repo' (or 'self'), or add the repository to the 'checkout:' list.",
agent_name,
alias
);
}
anyhow::bail!(
"Agent '{}' has workspace: '{}' which does not match any checked-out repository. \
Valid values: 'root', 'repo' (or 'self'), or one of {:?}",
agent_name,
alias,
checkout
);
}
Ok(format!("{}{}", WORKSPACE_ALIAS_PREFIX, alias))
}
}
}
Some(ws) => ws.clone(),
None if has_additional_checkouts => "repo".to_string(),
None => "root".to_string(),
None if has_additional_checkouts => Ok("repo".to_string()),
None => Ok("root".to_string()),
}
}

/// Generate working directory based on workspace setting
pub fn generate_working_directory(effective_workspace: &str) -> String {
if let Some(alias) = effective_workspace.strip_prefix(WORKSPACE_ALIAS_PREFIX) {
return format!("$(Build.SourcesDirectory)/{}", alias);
}
match effective_workspace {
"repo" => "$(Build.SourcesDirectory)/$(Build.Repository.Name)".to_string(),
"root" | _ => "$(Build.SourcesDirectory)".to_string(),
"root" => "$(Build.SourcesDirectory)".to_string(),
// compute_effective_workspace only ever returns "root", "repo", or an
// "alias:<name>" sentinel; any other value indicates a programming
// error rather than user input. Fall back to the safest path.
other => {
debug_assert!(false, "unexpected effective workspace value: {other}");
"$(Build.SourcesDirectory)".to_string()
}
}
}

Expand Down Expand Up @@ -1638,7 +1724,7 @@ pub async fn compile_shared(
&front_matter.workspace,
&front_matter.checkout,
&front_matter.name,
);
)?;
let working_directory = generate_working_directory(&effective_workspace);
let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?;
let has_schedule = front_matter.schedule.is_some();
Expand Down Expand Up @@ -1796,37 +1882,132 @@ mod tests {

#[test]
fn test_workspace_explicit_root() {
let ws = compute_effective_workspace(&Some("root".to_string()), &[], "agent");
let ws = compute_effective_workspace(&Some("root".to_string()), &[], "agent").unwrap();
assert_eq!(ws, "root");
assert_eq!(generate_working_directory(&ws), "$(Build.SourcesDirectory)");
}

#[test]
fn test_workspace_explicit_repo_with_checkouts() {
let checkouts = vec!["other-repo".to_string()];
let ws = compute_effective_workspace(&Some("repo".to_string()), &checkouts, "agent");
let ws =
compute_effective_workspace(&Some("repo".to_string()), &checkouts, "agent").unwrap();
assert_eq!(ws, "repo");
assert_eq!(
generate_working_directory(&ws),
"$(Build.SourcesDirectory)/$(Build.Repository.Name)"
);
}

#[test]
fn test_workspace_explicit_self_alias_for_repo() {
let checkouts = vec!["other-repo".to_string()];
let ws =
compute_effective_workspace(&Some("self".to_string()), &checkouts, "agent").unwrap();
// 'self' is a synonym for 'repo' (the trigger repository).
assert_eq!(ws, "repo");
assert_eq!(
generate_working_directory(&ws),
"$(Build.SourcesDirectory)/$(Build.Repository.Name)"
);
}

#[test]
fn test_workspace_implicit_root_no_checkouts() {
let ws = compute_effective_workspace(&None, &[], "agent");
let ws = compute_effective_workspace(&None, &[], "agent").unwrap();
assert_eq!(ws, "root");
}

#[test]
fn test_workspace_implicit_repo_with_checkouts() {
let checkouts = vec!["other-repo".to_string()];
let ws = compute_effective_workspace(&None, &checkouts, "agent");
let ws = compute_effective_workspace(&None, &checkouts, "agent").unwrap();
assert_eq!(ws, "repo");
}

#[test]
fn test_workspace_explicit_repo_no_checkouts_still_returns_repo() {
// Emits a warning but still returns "repo"
let ws = compute_effective_workspace(&Some("repo".to_string()), &[], "agent");
let ws = compute_effective_workspace(&Some("repo".to_string()), &[], "agent").unwrap();
assert_eq!(ws, "repo");
}

#[test]
fn test_workspace_explicit_self_no_checkouts_still_returns_repo() {
// 'self' takes the same code path as 'repo'; it should also warn
// and still resolve to the repo subfolder.
let ws = compute_effective_workspace(&Some("self".to_string()), &[], "agent").unwrap();
assert_eq!(ws, "repo");
}

#[test]
fn test_workspace_explicit_alias_with_traversal_fails() {
let checkouts = vec!["../sibling".to_string()];
let err = compute_effective_workspace(
&Some("../sibling".to_string()),
&checkouts,
"agent",
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("not a safe path"), "msg: {msg}");
}

#[test]
fn test_workspace_explicit_alias_with_slash_fails() {
let checkouts = vec!["foo/bar".to_string()];
let err = compute_effective_workspace(
&Some("foo/bar".to_string()),
&checkouts,
"agent",
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("not a safe path"), "msg: {msg}");
}

#[test]
fn test_workspace_explicit_alias_resolves_to_repo_subdir() {
let checkouts = vec!["exp23-a7-nw".to_string(), "another-repo".to_string()];
let ws = compute_effective_workspace(
&Some("exp23-a7-nw".to_string()),
&checkouts,
"agent",
)
.unwrap();
assert_eq!(
generate_working_directory(&ws),
"$(Build.SourcesDirectory)/exp23-a7-nw"
);
}

#[test]
fn test_workspace_explicit_alias_not_in_checkout_fails() {
let checkouts = vec!["other-repo".to_string()];
let err = compute_effective_workspace(
&Some("exp23-a7-nw".to_string()),
&checkouts,
"agent",
)
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("exp23-a7-nw"), "msg: {msg}");
assert!(msg.contains("does not match"), "msg: {msg}");
}

#[test]
fn test_workspace_explicit_alias_no_checkouts_fails() {
let err =
compute_effective_workspace(&Some("exp23-a7-nw".to_string()), &[], "agent")
.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("exp23-a7-nw"), "msg: {msg}");
assert!(
msg.contains("no additional repositories are checked out"),
"msg: {msg}"
);
}

// ─── validate_checkout_list ───────────────────────────────────────────────

#[test]
Expand Down Expand Up @@ -1874,6 +2055,23 @@ mod tests {
assert!(result.is_ok());
}

#[test]
fn test_validate_checkout_list_reserved_name_fails() {
// A repo aliased "repo" would silently shadow `workspace: repo`, so
// reject it at compile time.
let repos = vec![Repository {
repository: "repo".to_string(),
repo_type: "git".to_string(),
name: "org/repo".to_string(),
repo_ref: "refs/heads/main".to_string(),
}];
let checkout = vec!["repo".to_string()];
let err = validate_checkout_list(&repos, &checkout).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("reserved"), "msg: {msg}");
assert!(msg.contains("'repo'"), "msg: {msg}");
}

// ─── Engine::args (copilot params) ──────────────────────────────────────

#[test]
Expand Down
26 changes: 26 additions & 0 deletions src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ use std::collections::HashMap;

// ── Character allowlist validators ──────────────────────────────────────────

/// Validate that a string is safe to embed as a single path segment (e.g. a
/// repository alias appended to `$(Build.SourcesDirectory)`). Rejects empty
/// strings, anything containing `..`, path separators (`/`, `\`), or leading
/// `.` to prevent path traversal / hidden-directory escapes.
pub fn is_safe_path_segment(s: &str) -> bool {
!s.is_empty()
&& !s.contains("..")
&& !s.contains('/')
&& !s.contains('\\')
&& !s.starts_with('.')
}

/// Characters allowed in engine.command paths (absolute path chars only).
/// Prevents shell injection when the path is embedded in AWF single-quoted commands.
pub fn is_valid_command_path(s: &str) -> bool {
Expand Down Expand Up @@ -388,6 +400,20 @@ mod tests {

// ── Character allowlist validators ──────────────────────────────────

#[test]
fn test_is_safe_path_segment() {
assert!(is_safe_path_segment("my-repo"));
assert!(is_safe_path_segment("exp23-a7-nw"));
assert!(is_safe_path_segment("repo_v2"));
assert!(!is_safe_path_segment(""));
assert!(!is_safe_path_segment(".."));
assert!(!is_safe_path_segment("../sibling"));
assert!(!is_safe_path_segment("foo/bar"));
assert!(!is_safe_path_segment("foo\\bar"));
assert!(!is_safe_path_segment(".hidden"));
assert!(!is_safe_path_segment("foo..bar"));
}

#[test]
fn test_is_valid_command_path() {
assert!(is_valid_command_path("/tmp/awf-tools/copilot"));
Expand Down