Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,25 @@
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.jboss.logging.Logger;

/**
* Utility to detect and sanitize sensitive data in POM content before processing.
* <p>
* Scans for common credential patterns (passwords, tokens, API keys, secrets) in XML element values and masks them.
* Property placeholders (e.g., {@code ${db.password}}) are preserved since they reference external values and do not
* contain actual secrets.
* Also detects credentials embedded in URLs ({@code ://user:password@host}). Property placeholders (e.g.,
* {@code ${db.password}}, {@code {{vault:password}}}) are preserved since they reference external values and do not
* contain actual secrets. CDATA-wrapped values are inspected and masked when appropriate.
* <p>
* <b>Limitations:</b> Detection is tag-name-based using keyword matching. This means:
* <ul>
* <li><b>False positives</b> — non-secret values in elements whose names happen to contain a keyword (e.g.,
* {@code <password-policy>strict</password-policy>},
* {@code <token-refresh-interval>300</token-refresh-interval>}).</li>
* <li><b>False negatives</b> — actual secrets in elements with non-obvious names (e.g., credentials embedded in JDBC
* URLs, or elements named {@code <my.credential>} where the singular form is not in the keyword list).</li>
* <li><b>False negatives</b> — actual secrets in elements with non-obvious names (e.g., elements named
* {@code <my.credential>} where the singular form is not in the keyword list). URL credential detection is limited to
* the {@code ://user:password@host} pattern.</li>
* </ul>
* This heuristic is a best-effort safety net, not a guarantee. Users should still avoid passing sensitive data.
*/
Expand All @@ -49,17 +50,25 @@ final class PomSanitizer {

private static final String SENSITIVE_KEYWORDS
= "password|passwd|token|apikey|api-key|api_key|secret|secretkey|secret-key|secret_key"
+ "|accesskey|access-key|access_key|passphrase|privatekey|private-key|private_key|credentials";
+ "|accesskey|access-key|access_key|passphrase|privatekey|private-key|private_key|credentials"
+ "|connection-string|connectionstring|connection_string";

/**
* Pattern matching XML elements whose tag names contain sensitive keywords. Captures: group(1) = element name,
* group(2) = element value.
* group(2) = full content between tags (including whitespace and optional CDATA wrapper).
*/
private static final Pattern SENSITIVE_ELEMENT_PATTERN = Pattern.compile(
"<([a-zA-Z0-9_.:-]*(?:" + SENSITIVE_KEYWORDS + ")[a-zA-Z0-9_.:-]*)>"
+ "\\s*([^<]+?)\\s*"
+ "(\\s*(?:<!\\[CDATA\\[.*?\\]\\]>|[^<]+?)\\s*)"
+ "</\\1>",
Pattern.CASE_INSENSITIVE);
Pattern.CASE_INSENSITIVE | Pattern.DOTALL);

/**
* Pattern matching URL-embedded credentials ({@code ://user:password@host}). Captures: group(1) = scheme through
* username ({@code ://user}), group(2) = password.
*/
private static final Pattern URL_CREDENTIAL_PATTERN = Pattern.compile(
"(://[^/@\\s:]+):([^/@\\s]+)@");

private PomSanitizer() {
}
Expand All @@ -70,48 +79,41 @@ private PomSanitizer() {
* @return list of element names that contain sensitive values
*/
static List<String> detectSensitiveContent(String pomContent) {
Set<String> findings = new LinkedHashSet<>();

Matcher matcher = SENSITIVE_ELEMENT_PATTERN.matcher(pomContent);
while (matcher.find()) {
String value = matcher.group(2).trim();
// Property placeholders like ${my.password} are not actual secrets
if (!value.startsWith("${")) {
findings.add(matcher.group(1));
}
}

return new ArrayList<>(findings);
return sanitize(pomContent).detectedPatterns();
}

/**
* Sanitize POM content by masking sensitive element values.
* Sanitize POM content by masking sensitive element values and URL-embedded credentials.
* <p>
* Property placeholders (e.g., {@code ${db.password}}) are preserved since they do not contain actual secret
* values.
* Property placeholders (e.g., {@code ${db.password}}, {@code {{vault:password}}}) are preserved since they do not
* contain actual secret values. CDATA-wrapped values are inspected and masked when they contain plain-text secrets.
*
* @return sanitization result with the processed POM content and detected patterns
*/
static SanitizationResult sanitize(String pomContent) {
List<String> detected = detectSensitiveContent(pomContent);

String sanitized = pomContent;
Set<String> detected = new LinkedHashSet<>();

// Mask sensitive element values (preserve property placeholders)
sanitized = SENSITIVE_ELEMENT_PATTERN.matcher(sanitized).replaceAll(mr -> {
String value = mr.group(2).trim();
if (value.startsWith("${")) {
String sanitized = SENSITIVE_ELEMENT_PATTERN.matcher(pomContent).replaceAll(mr -> {
String elementName = mr.group(1);
String value = extractValue(mr.group(2));
if (isPlaceholder(value)) {
return Matcher.quoteReplacement(mr.group());
}
detected.add(elementName);
return Matcher.quoteReplacement(
"<" + mr.group(1) + ">***MASKED***</" + mr.group(1) + ">");
"<" + elementName + ">***MASKED***</" + elementName + ">");
});

sanitized = URL_CREDENTIAL_PATTERN.matcher(sanitized).replaceAll(mr -> {
detected.add("(URL credential)");
return Matcher.quoteReplacement(mr.group(1) + ":***MASKED***@");
});

if (!detected.isEmpty()) {
LOG.warnf("Sensitive data detected in pomContent: %s. Content was sanitized before processing.", detected);
}

return new SanitizationResult(sanitized, detected);
return new SanitizationResult(sanitized, new ArrayList<>(detected));
}

/**
Expand All @@ -129,11 +131,24 @@ static ProcessedPom process(String pomContent, Boolean sanitize) {
List<String> warnings = new ArrayList<>();
if (!sr.detectedPatterns().isEmpty()) {
warnings.add("Sensitive data detected and masked: "
+ sr.detectedPatterns().stream().collect(Collectors.joining(", ")));
+ String.join(", ", sr.detectedPatterns()));
}
return new ProcessedPom(sr.pomContent(), warnings);
}

private static String extractValue(String content) {
String trimmed = content.trim();
if (trimmed.startsWith("<![CDATA[") && trimmed.endsWith("]]>")) {
return trimmed.substring(9, trimmed.length() - 3).trim();
}
return trimmed;
}

private static boolean isPlaceholder(String value) {
return value.startsWith("${")
|| (value.startsWith("{{") && value.endsWith("}}"));
}

record SanitizationResult(
String pomContent,
List<String> detectedPatterns) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,99 @@ void detectsPassphraseElement() {
assertThat(findings).anyMatch(f -> f.contains("passphrase"));
}

@Test
void detectsConnectionStringElement() {
String pom
= "<project><properties><connectionString>Server=myserver;Password=secret123</connectionString></properties></project>";
List<String> findings = PomSanitizer.detectSensitiveContent(pom);
assertThat(findings).anyMatch(f -> f.contains("connectionString"));
}

// ---- CDATA tests ----

@Test
void detectsCdataWrappedSecrets() {
String pom = "<project><properties><db.password><![CDATA[superSecret123]]></db.password></properties></project>";
List<String> findings = PomSanitizer.detectSensitiveContent(pom);
assertThat(findings).anyMatch(f -> f.contains("password"));
}

@Test
void masksCdataWrappedSecrets() {
String pom = "<project><properties><db.password><![CDATA[superSecret123]]></db.password></properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).doesNotContain("superSecret123");
assertThat(result.pomContent()).contains("<db.password>***MASKED***</db.password>");
}

@Test
void preservesPlaceholderInsideCdata() {
String pom = "<project><properties><db.password><![CDATA[${env.DB_PASSWORD}]]></db.password></properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).contains("${env.DB_PASSWORD}");
assertThat(result.detectedPatterns()).isEmpty();
}

// ---- Camel property placeholder tests ----

@Test
void ignoresCamelPropertyPlaceholders() {
String pom = "<project><properties>"
+ "<db.password>{{vault:db/password}}</db.password>"
+ "<api.token>{{my.token}}</api.token>"
+ "</properties></project>";
List<String> findings = PomSanitizer.detectSensitiveContent(pom);
assertThat(findings).isEmpty();
}

@Test
void preservesCamelPropertyPlaceholders() {
String pom = "<project><properties><db.password>{{vault:db/password}}</db.password></properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).contains("{{vault:db/password}}");
assertThat(result.detectedPatterns()).isEmpty();
}

@Test
void masksPartialCamelPlaceholder() {
String pom = "<project><properties><db.password>{{notClosed</db.password></properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).contains("***MASKED***");
}

// ---- URL credential tests ----

@Test
void detectsUrlEmbeddedCredentials() {
String pom = "<project><properties>"
+ "<db.url>jdbc:mysql://admin:s3cret@db.example.com:3306/mydb</db.url>"
+ "</properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).doesNotContain("s3cret");
assertThat(result.pomContent()).contains("://admin:***MASKED***@");
assertThat(result.detectedPatterns()).anyMatch(f -> f.contains("URL credential"));
}

@Test
void elementPatternTakesPrecedenceOverUrlPattern() {
String pom = "<project><properties>"
+ "<db.password>jdbc:mysql://admin:s3cret@host/db</db.password>"
+ "</properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).contains("<db.password>***MASKED***</db.password>");
assertThat(result.pomContent()).doesNotContain("s3cret");
}

@Test
void noFalsePositiveOnPortNumbers() {
String pom = "<project><properties>"
+ "<db.url>http://localhost:8080/api</db.url>"
+ "</properties></project>";
PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
assertThat(result.pomContent()).contains("http://localhost:8080/api");
assertThat(result.detectedPatterns()).isEmpty();
}

// ---- Process helper tests ----

@Test
Expand Down