Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(codeowners): Find codeowner path matches with rust #1746

Merged
merged 4 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions py/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Unreleased

- Add utility function for matching CODEOWNER paths against a stacktrace filepath ([#1746](https://github.com/getsentry/relay/pull/1746))

## 0.8.16

- The minimum required Python version is now 3.8. This release does not contain known breaking changes for Python 3.7, but we no longer guarantee compatibility.
Expand Down
9 changes: 9 additions & 0 deletions py/sentry_relay/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"StoreNormalizer",
"GeoIpLookup",
"is_glob_match",
"is_codeowners_path_match",
"parse_release",
"validate_pii_config",
"convert_datascrubbing_config",
Expand Down Expand Up @@ -159,6 +160,14 @@ def is_glob_match(
return rustcall(lib.relay_is_glob_match, make_buf(value), encode_str(pat), flags)


def is_codeowners_path_match(value, pattern):
if isinstance(value, text_type):
value = value.encode("utf-8")
return rustcall(
lib.relay_is_codeowners_path_match, make_buf(value), encode_str(pattern)
)


def validate_pii_config(config):
"""
Validate a PII config against the schema. Used in project options UI.
Expand Down
8 changes: 8 additions & 0 deletions relay-cabi/include/relay.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,14 @@ bool relay_is_glob_match(const struct RelayBuf *value,
const struct RelayStr *pat,
GlobFlags flags);

/**
* Converts a codeowners path into a regex and searches for match against the provided value.
*
* Returns `true` if the regex matches, `false` otherwise.
*/
bool relay_is_codeowners_path_match(const struct RelayBuf *value,
const struct RelayStr *pat);

/**
* Parse a sentry release structure from a string.
*/
Expand Down
12 changes: 11 additions & 1 deletion relay-cabi/src/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::slice;

use once_cell::sync::OnceCell;

use relay_common::{glob_match_bytes, GlobOptions};
use relay_common::{codeowners_match_bytes, glob_match_bytes, GlobOptions};
use relay_general::pii::{
selector_suggestions_from_value, DataScrubbingConfig, PiiConfig, PiiProcessor,
};
Expand Down Expand Up @@ -235,6 +235,16 @@ pub unsafe extern "C" fn relay_is_glob_match(
glob_match_bytes((*value).as_bytes(), (*pat).as_str(), options)
}

/// Returns `true` if the codeowners path matches the value, `false` otherwise.
#[no_mangle]
#[relay_ffi::catch_unwind]
pub unsafe extern "C" fn relay_is_codeowners_path_match(
value: *const RelayBuf,
pattern: *const RelayStr,
) -> bool {
codeowners_match_bytes((*value).as_bytes(), (*pattern).as_str())
}

/// Parse a sentry release structure from a string.
#[no_mangle]
#[relay_ffi::catch_unwind]
Expand Down
128 changes: 128 additions & 0 deletions relay-common/src/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ use regex::bytes::{Regex, RegexBuilder};
static GLOB_CACHE: Lazy<Mutex<LruCache<(GlobOptions, String), Regex>>> =
Lazy::new(|| Mutex::new(LruCache::new(500)));

static CODEOWNERS_CACHE: Lazy<Mutex<LruCache<String, Regex>>> =
Lazy::new(|| Mutex::new(LruCache::new(500)));

/// Controls the options of the globber.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
pub struct GlobOptions {
Expand All @@ -33,6 +36,97 @@ fn translate_pattern(pat: &str, options: GlobOptions) -> Option<Regex> {
.ok()
}

fn translate_codeowners_pattern(pattern: &str) -> Option<Regex> {
let mut regex = String::new();

// Special case backslash can match a backslash file or directory
if pattern.starts_with('\\') {
return Regex::new(r"\\(?:\z|/)").ok();
}

let anchored = pattern
.find('/')
.map_or(false, |pos| pos != pattern.len() - 1);

if anchored {
regex += r"\A";
} else {
regex += r"(?:\A|/)";
}

let matches_dir = pattern.ends_with('/');
let mut pattern = pattern;
if matches_dir {
pattern = pattern.trim_end_matches('/');
}

// patterns ending with "/*" are special. They only match items directly in the directory
// not deeper
let trailing_slash_star = pattern.len() > 1 && pattern.ends_with("/*");

let mut iterator = pattern.chars().enumerate();

// Anchored paths may or may not start with a slash
if anchored && pattern.starts_with('/') {
iterator.next();
regex += r"/?";
}

let mut num_to_skip = None;
for (i, ch) in iterator {
if let Some(skip_amount) = num_to_skip {
num_to_skip = Some(skip_amount - 1);
continue;
}
if ch == '*' {
// Handle double star (**) case properly
if i + 1 < pattern.len() && pattern.chars().nth(i + 1) == Some('*') {
let left_anchored = i == 0;
let leading_slash = i > 0 && pattern.chars().nth(i - 1) == Some('/');
let right_anchored = i + 2 == pattern.len();
let trailing_slash =
i + 2 < pattern.len() && pattern.chars().nth(i + 2) == Some('/');

if (left_anchored || leading_slash) && (right_anchored || trailing_slash) {
regex += ".*";
num_to_skip = Some(2);
continue;
}
}
regex += r"[^/]*";
} else if ch == '?' {
regex += r"[^/]";
} else {
regex += &regex::escape(ch.to_string().as_str());
}
}

if matches_dir {
regex += "/";
} else if trailing_slash_star {
regex += r"\z";
} else {
regex += r"(?:\z|/)";
}
Regex::new(&regex).ok()
}

/// Returns `true` if the codeowners regex matches, `false` otherwise.
pub fn codeowners_match_bytes(value: &[u8], pat: &str) -> bool {
let key = pat.to_string();

let mut cache = CODEOWNERS_CACHE.lock();

if let Some(pattern) = cache.get(&key) {
pattern.is_match(value)
} else if let Some(pattern) = translate_codeowners_pattern(&key) {
let result = pattern.is_match(value);
cache.put(key, pattern);
result
} else {
false
}
}
/// Performs a glob operation on bytes.
///
/// Returns `true` if the glob matches, `false` otherwise.
Expand Down Expand Up @@ -113,4 +207,38 @@ mod tests {
test_glob!(&long_string, "*************************.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
test_glob!(&long_string, "*************************.js", false, {double_star: true, case_insensitive: true, path_normalize: true});
}

#[test]
fn test_translate_codeowners_pattern() {
let pattern = "*.txt";
let regex = translate_codeowners_pattern(pattern).unwrap();
assert!(regex.is_match(b"file.txt"));
assert!(regex.is_match(b"file.txt/"));
assert!(regex.is_match(b"dir/file.txt"));

let pattern = "/dir/*.txt";
let regex = translate_codeowners_pattern(pattern).unwrap();
assert!(regex.is_match(b"/dir/file.txt"));
assert!(regex.is_match(b"dir/file.txt"));
assert!(!regex.is_match(b"/dir/subdir/file.txt"));

let pattern = "apps/";
let regex = translate_codeowners_pattern(pattern).unwrap();
assert!(regex.is_match(b"apps/file.txt"));
assert!(regex.is_match(b"/apps/file.txt"));
assert!(regex.is_match(b"/dir/apps/file.txt"));
assert!(regex.is_match(b"/dir/subdir/apps/file.txt"));

let pattern = "docs/*";
let regex = translate_codeowners_pattern(pattern).unwrap();
assert!(regex.is_match(b"docs/getting-started.md"));
// should not match on nested files
assert!(!regex.is_match(b"docs/build-app/troubleshooting.md"));

let pattern = "/docs/";
let regex = translate_codeowners_pattern(pattern).unwrap();
assert!(regex.is_match(b"/docs/file.txt"));
assert!(regex.is_match(b"/docs/subdir/file.txt"));
assert!(!regex.is_match(b"app/docs/file.txt"));
}
}