Skip to content

Ruby: Use additional sensitive data heuristics for CleartextSources #16503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ruby/ql/lib/change-notes/2024-05-15-cleartext-sources.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The `CleartextSources.qll` library, used by `rb/clear-text-logging-sensitive-data` and `rb/clear-text-logging-sensitive-data`, has been updated to consider heuristics for additional categories of sensitive data.
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ private module Cached {
newtype TOptionalContentSet =
TSingletonContent(Content c) or
TAnyElementContent() or
TAnyContent() or
TKnownOrUnknownElementContent(Content::KnownElementContent c) or
TElementLowerBoundContent(int lower, boolean includeUnknown) {
FlowSummaryImpl::ParsePositions::isParsedElementLowerBoundPosition(_, includeUnknown, lower)
Expand All @@ -736,7 +737,7 @@ private module Cached {

cached
class TContentSet =
TSingletonContent or TAnyElementContent or TKnownOrUnknownElementContent or
TSingletonContent or TAnyElementContent or TAnyContent or TKnownOrUnknownElementContent or
TElementLowerBoundContent or TElementContentOfTypeContent;

private predicate trackKnownValue(ConstantValue cv) {
Expand Down
28 changes: 21 additions & 7 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,9 @@ class ContentSet extends TContentSet {
/** Holds if this content set represents all `ElementContent`s. */
predicate isAnyElement() { this = TAnyElementContent() }

/** Holds if this content set represents all contents. */
predicate isAny() { this = TAnyContent() }

/**
* Holds if this content set represents a specific known element index, or an
* unknown element index.
Expand Down Expand Up @@ -737,6 +740,9 @@ class ContentSet extends TContentSet {
this.isAnyElement() and
result = "any element"
or
this.isAny() and
result = "any"
or
exists(Content::KnownElementContent c |
this.isKnownOrUnknownElement(c) and
result = c + " or unknown"
Expand Down Expand Up @@ -790,13 +796,8 @@ class ContentSet extends TContentSet {
result = TUnknownElementContent()
}

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() {
this.isSingleton(result)
or
this.isAnyElement() and
result instanceof Content::ElementContent
or
pragma[nomagic]
private Content getAnElementReadContent() {
exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) |
result = c or
result = TSplatContent(c.getIndex().getInt(), _) or
Expand Down Expand Up @@ -832,6 +833,19 @@ class ContentSet extends TContentSet {
result = TUnknownElementContent()
)
}

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() {
this.isSingleton(result)
or
this.isAnyElement() and
result instanceof Content::ElementContent
or
this.isAny() and
exists(result)
or
result = this.getAnElementReadContent()
}
}

/**
Expand Down
5 changes: 5 additions & 0 deletions ruby/ql/lib/codeql/ruby/security/CleartextLoggingQuery.qll
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ private module Config implements DataFlow::ConfigSig {
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
CL::isAdditionalTaintStep(nodeFrom, nodeTo)
}

predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
cs.isAny() and
isSink(node)
}
}

/**
Expand Down
5 changes: 5 additions & 0 deletions ruby/ql/lib/codeql/ruby/security/CleartextStorageQuery.qll
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ private module Config implements DataFlow::ConfigSig {
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
CS::isAdditionalTaintStep(nodeFrom, nodeTo)
}

predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
cs.isAny() and
isSink(node)
}
}

/**
Expand Down
100 changes: 67 additions & 33 deletions ruby/ql/lib/codeql/ruby/security/internal/CleartextSources.qll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ private import codeql.ruby.DataFlow
private import codeql.ruby.TaintTracking::TaintTracking
private import codeql.ruby.dataflow.RemoteFlowSources
private import SensitiveDataHeuristics::HeuristicNames
private import SensitiveDataHeuristics
private import codeql.ruby.CFG
private import codeql.ruby.dataflow.SSA

Expand Down Expand Up @@ -39,6 +40,34 @@ module CleartextSources {
re.getConstantValue().getStringlikeValue() = [".*", ".+"]
}

/** Holds if `c` is a sensitive data classification that is relevant to consider for Cleartext Storage queries. */
private predicate isRelevantClassification(SensitiveDataClassification c) {
c =
[
SensitiveDataClassification::password(), SensitiveDataClassification::certificate(),
SensitiveDataClassification::secret(), SensitiveDataClassification::private()
]
}

pragma[noinline]
private string getCombinedRelevantSensitiveRegexp() {
// Combine all the maybe-sensitive regexps into one using non-capturing groups and |.
result =
"(?:" +
strictconcat(string r, SensitiveDataClassification c |
r = maybeSensitiveRegexp(c) and isRelevantClassification(c)
|
r, ")|(?:"
) + ")"
}

/** Holds if the given name indicates the presence of sensitive data that is relevant to consider for Cleartext Storage queries. */
bindingset[name]
private predicate nameIndicatesRelevantSensitiveData(string name) {
name.regexpMatch(getCombinedRelevantSensitiveRegexp()) and
not name.regexpMatch(notSensitiveRegexp())
}

/**
* Holds if `re` may be a regular expression that can be used to sanitize
* sensitive data with a call to `gsub`.
Expand Down Expand Up @@ -92,17 +121,17 @@ module CleartextSources {
}

/**
* A call that might obfuscate a password, for example through hashing.
* A call that might obfuscate sensitive data, for example through hashing.
*/
private class ObfuscatorCall extends Sanitizer, DataFlow::CallNode {
ObfuscatorCall() { nameIsNotSensitive(this.getMethodName()) }
}

/**
* A data flow node that does not contain a clear-text password, according to its syntactic name.
* A data flow node that does not contain clear-text sensitive data, according to its syntactic name.
*/
private class NameGuidedNonCleartextPassword extends NonCleartextPassword {
NameGuidedNonCleartextPassword() {
private class NameGuidedNonCleartextSensitive extends NonCleartextSensitive {
NameGuidedNonCleartextSensitive() {
exists(string name | nameIsNotSensitive(name) |
// accessing a non-sensitive variable
this.asExpr().getExpr().(VariableReadAccess).getVariable().getName() = name
Expand All @@ -129,18 +158,23 @@ module CleartextSources {
}

/**
* A data flow node that receives flow that is not a clear-text password.
* A data flow node that receives flow that is not clear-text sensitive data.
*/
class NonCleartextPasswordFlow extends NonCleartextPassword {
NonCleartextPasswordFlow() {
any(NonCleartextPassword other).(DataFlow::LocalSourceNode).flowsTo(this)
class NonCleartextSensitiveFlow extends NonCleartextSensitive {
NonCleartextSensitiveFlow() {
any(NonCleartextSensitive other).(DataFlow::LocalSourceNode).flowsTo(this)
}
}

/**
* A data flow node that does not contain a clear-text password.
* DEPRECATED: Use NonCleartextSensitiveFlow instead.
*/
deprecated class NonCleartextPasswordFlow = NonCleartextSensitiveFlow;

/**
* A data flow node that does not contain clear-text sensitive data.
*/
abstract private class NonCleartextPassword extends DataFlow::Node { }
abstract private class NonCleartextSensitive extends DataFlow::Node { }

// `writeNode` assigns pair with key `name` to `val`
private predicate hashKeyWrite(DataFlow::CallNode writeNode, string name, DataFlow::Node val) {
Expand All @@ -153,18 +187,18 @@ module CleartextSources {
}

/**
* A value written to a hash entry with a key that may contain password information.
* A value written to a hash entry with a key that may contain sensitive information.
*/
private class HashKeyWritePasswordSource extends Source {
private class HashKeyWriteSensitiveSource extends Source {
private string name;
private DataFlow::ExprNode recv;

HashKeyWritePasswordSource() {
HashKeyWriteSensitiveSource() {
exists(DataFlow::CallNode writeNode |
name.regexpMatch(maybePassword()) and
nameIndicatesRelevantSensitiveData(name) and
not nameIsNotSensitive(name) and
// avoid safe values assigned to presumably unsafe names
not this instanceof NonCleartextPassword and
not this instanceof NonCleartextSensitive and
// hash[name] = val
hashKeyWrite(writeNode, name, this) and
recv = writeNode.getReceiver()
Expand All @@ -177,7 +211,7 @@ module CleartextSources {
string getName() { result = name }

/**
* Gets the name of the hash variable that this password source is assigned
* Gets the name of the hash variable that this sensitive source is assigned
* to, if applicable.
*/
LocalVariable getVariable() {
Expand All @@ -186,17 +220,17 @@ module CleartextSources {
}

/**
* An entry into a hash literal that may contain a password
* An entry into a hash literal that may contain sensitive data
*/
private class HashLiteralPasswordSource extends Source {
private class HashLiteralSensitiveSource extends Source {
private string name;

HashLiteralPasswordSource() {
HashLiteralSensitiveSource() {
exists(CfgNodes::ExprNodes::HashLiteralCfgNode lit |
name.regexpMatch(maybePassword()) and
nameIndicatesRelevantSensitiveData(name) and
not nameIsNotSensitive(name) and
// avoid safe values assigned to presumably unsafe names
not this instanceof NonCleartextPassword and
not this instanceof NonCleartextSensitive and
// hash = { name: val }
exists(CfgNodes::ExprNodes::PairCfgNode p | p = lit.getAKeyValuePair() |
p.getKey().getConstantValue().getStringlikeValue() = name and
Expand All @@ -208,14 +242,14 @@ module CleartextSources {
override string describe() { result = "a write to " + name }
}

/** An assignment that may assign a password to a variable */
private class AssignPasswordVariableSource extends Source {
/** An assignment that may assign sensitive data to a variable */
private class AssignSensitiveVariableSource extends Source {
string name;

AssignPasswordVariableSource() {
AssignSensitiveVariableSource() {
// avoid safe values assigned to presumably unsafe names
not this instanceof NonCleartextPassword and
name.regexpMatch(maybePassword()) and
not this instanceof NonCleartextSensitive and
nameIndicatesRelevantSensitiveData(name) and
not nameIsNotSensitive(name) and
exists(Assignment a |
this.asExpr().getExpr() = a.getRightOperand() and
Expand All @@ -226,14 +260,14 @@ module CleartextSources {
override string describe() { result = "an assignment to " + name }
}

/** A parameter that may contain a password. */
private class ParameterPasswordSource extends Source {
/** A parameter that may contain sensitive data. */
private class ParameterSensitiveSource extends Source {
private string name;

ParameterPasswordSource() {
name.regexpMatch(maybePassword()) and
ParameterSensitiveSource() {
nameIndicatesRelevantSensitiveData(name) and
not nameIsNotSensitive(name) and
not this instanceof NonCleartextPassword and
not this instanceof NonCleartextSensitive and
exists(Parameter p, LocalVariable v |
v = p.getAVariable() and
v.getName() = name and
Expand All @@ -260,10 +294,10 @@ module CleartextSources {
deprecated predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(string name, ElementReference ref, LocalVariable hashVar |
// from `hsh[password] = "changeme"` to a `hsh[password]` read
nodeFrom.(HashKeyWritePasswordSource).getName() = name and
nodeFrom.(HashKeyWriteSensitiveSource).getName() = name and
nodeTo.asExpr().getExpr() = ref and
ref.getArgument(0).getConstantValue().getStringlikeValue() = name and
nodeFrom.(HashKeyWritePasswordSource).getVariable() = hashVar and
nodeFrom.(HashKeyWriteSensitiveSource).getVariable() = hashVar and
ref.getReceiver().(VariableReadAccess).getVariable() = hashVar and
nodeFrom.asExpr().getASuccessor*() = nodeTo.asExpr()
)
Expand Down
Loading