From 541bb0087eb95b8d67c98547240c8104c5b2a69f Mon Sep 17 00:00:00 2001 From: Will Wang Date: Wed, 24 May 2023 17:03:53 -0400 Subject: [PATCH] chore(enterprise): Extend library functionality for secret scanning (#17483) Ref OPB-710 This PR - Exposes patterns and the `interpolate` function for use in OPW. This will help deduplicate some logic and reduce maintenance burden. We use the patterns themselves in secret scanning logic, and will use the `interpolate` function in bootstrap-related logic. --- src/config/loading/secret.rs | 2 +- src/config/mod.rs | 4 +++- src/config/vars.rs | 35 +++++++++++++++++++---------------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/config/loading/secret.rs b/src/config/loading/secret.rs index 54c83823e2218..09a32260708e1 100644 --- a/src/config/loading/secret.rs +++ b/src/config/loading/secret.rs @@ -26,7 +26,7 @@ use crate::{ // - "SECRET[backend..secret.name]" will match and capture "backend" and ".secret.name" // - "SECRET[secret_name]" will not match // - "SECRET[.secret.name]" will not match -static COLLECTOR: Lazy = +pub static COLLECTOR: Lazy = Lazy::new(|| Regex::new(r"SECRET\[([[:word:]]+)\.([[:word:].]+)\]").unwrap()); /// Helper type for specifically deserializing secrets backends. diff --git a/src/config/mod.rs b/src/config/mod.rs index 473961f2eaf7c..c1f949d3506fa 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -48,7 +48,8 @@ pub use format::{Format, FormatHint}; pub use id::{ComponentKey, Inputs}; pub use loading::{ load, load_builder_from_paths, load_from_paths, load_from_paths_with_provider_and_secrets, - load_from_str, load_source_from_paths, merge_path_lists, process_paths, CONFIG_PATHS, + load_from_str, load_source_from_paths, merge_path_lists, process_paths, COLLECTOR, + CONFIG_PATHS, }; pub use provider::ProviderConfig; pub use secret::SecretBackend; @@ -59,6 +60,7 @@ pub use transform::{ }; pub use unit_test::{build_unit_tests, build_unit_tests_main, UnitTestResult}; pub use validation::warnings; +pub use vars::{interpolate, ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX}; pub use vector_core::config::{ init_log_schema, log_schema, proxy::ProxyConfig, LogSchema, OutputId, }; diff --git a/src/config/vars.rs b/src/config/vars.rs index 3b923a9687bfe..73316884cebf3 100644 --- a/src/config/vars.rs +++ b/src/config/vars.rs @@ -1,7 +1,25 @@ use std::collections::HashMap; +use once_cell::sync::Lazy; use regex::{Captures, Regex}; +// Environment variable names can have any characters from the Portable Character Set other +// than NUL. However, for Vector's interpolation, we are closer to what a shell supports which +// is solely of uppercase letters, digits, and the '_' (that is, the `[:word:]` regex class). +// In addition to these characters, we allow `.` as this commonly appears in environment +// variable names when they come from a Java properties file. +// +// https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html +pub static ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX: Lazy = Lazy::new(|| { + Regex::new( + r"(?x) + \$\$| + \$([[:word:].]+)| + \$\{([[:word:].]+)(?:(:?-|:?\?)([^}]*))?\}", + ) + .unwrap() +}); + /// (result, warnings) pub fn interpolate( input: &str, @@ -10,22 +28,7 @@ pub fn interpolate( let mut errors = Vec::new(); let mut warnings = Vec::new(); - // Environment variable names can have any characters from the Portable Character Set other - // than NUL. However, for Vector's interpolation, we are closer to what a shell supports which - // is solely of uppercase letters, digits, and the '_' (that is, the `[:word:]` regex class). - // In addition to these characters, we allow `.` as this commonly appears in environment - // variable names when they come from a Java properties file. - // - // https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html - let re = Regex::new( - r"(?x) - \$\$| - \$([[:word:].]+)| - \$\{([[:word:].]+)(?:(:?-|:?\?)([^}]*))?\}", - ) - .unwrap(); - - let interpolated = re + let interpolated = ENVIRONMENT_VARIABLE_INTERPOLATION_REGEX .replace_all(input, |caps: &Captures<'_>| { let flags = caps.get(3).map(|m| m.as_str()).unwrap_or_default(); let def_or_err = caps.get(4).map(|m| m.as_str()).unwrap_or_default();