Skip to content

Ruby: Fix spurious flow through reverse stores #10574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 28 additions & 21 deletions docs/ql-libraries/dataflow/dataflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -419,20 +419,31 @@ class Content extends TContent {

newtype TContentSet =
TSingletonContent(Content c) or
TKnownOrUnknownArrayElementContent(TKnownArrayElementContent c) or
TAnyArrayElementContent()

class ContentSet extends TContentSet {
Content getAStoreContent() {
this = TSingletonContent(result)
or
// for reverse stores
this = TKnownOrUnknownArrayElementContent(result)
or
// for reverse stores
this = TAnyArrayElementContent() and
result = TUnknownArrayElementContent()
}

Content getAReadContent() {
this = TSingletonContent(result)
or
exists(TKnownArrayElementContent c |
this = TKnownOrUnknownArrayElementContent(c) |
result = c
or
result = TUnknownArrayElementContent()
)
or
this = TAnyArrayElementContent() and
(result = TUnknownArrayElementContent() or result = TKnownArrayElementContent(_))
}
Expand All @@ -447,12 +458,10 @@ a[0] = tainted
# storeStep(not_tainted, TSingletonContent(TKnownArrayElementContent(1)), [post update] a)
a[1] = not_tainted

# readStep(a, TSingletonContent(TKnownArrayElementContent(0)), a[0])
# readStep(a, TSingletonContent(TUnknownArrayElementContent()), a[0])
# readStep(a, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), a[0])
sink(a[0]) # bad

# readStep(a, TSingletonContent(TKnownArrayElementContent(1)), a[1])
# readStep(a, TSingletonContent(TUnknownArrayElementContent()), a[1])
# readStep(a, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(1)), a[1])
sink(a[1]) # good

# readStep(a, TAnyArrayElementContent(), a[unknown])
Expand All @@ -461,26 +470,24 @@ sink(a[unknown]) # bad; unknown may be 0
# storeStep(tainted, TSingletonContent(TUnknownArrayElementContent()), [post update] b)
b[unknown] = tainted

# readStep(b, TSingletonContent(TKnownArrayElementContent(0)), b[0])
# readStep(b, TSingletonContent(TUnknownArrayElementContent()), b[0])
# readStep(b, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), b[0])
sink(b[0]) # bad; unknown may be 0

# storeStep(tainted, TSingletonContent(TKnownArrayElementContent(0)), [post update] c[unknown])
# storeStep(not_tainted, TSingletonContent(TKnownArrayElementContent(1)), [post update] c[unknown])
# readStep(c, TAnyArrayElementContent(), c[unknown])
# storeStep([post update] c[unknown], TAnyArrayElementContent(), [post update] c) # auto-generated reverse store (see Example 2)
c[unknown][0] = tainted
c[unknown][1] = not_tainted


# readStep(c[0], TSingletonContent(TKnownArrayElementContent(0)), c[0][0])
# readStep(c[0], TSingletonContent(TUnknownArrayElementContent()), c[0][0])
# readStep(c[0], TSingletonContent(TKnownArrayElementContent(1)), c[0][1])
# readStep(c[0], TSingletonContent(TUnknownArrayElementContent()), c[0][1])
# readStep(c, TSingletonContent(TKnownArrayElementContent(0)), c[0])
# readStep(c, TSingletonContent(TUnknownArrayElementContent()), c[0])
# storeStep(tainted, TSingletonContent(TUnknownArrayElementContent()), [post update] c[0])
# storeStep(not_tainted, TSingletonContent(TUnknownArrayElementContent()), [post update] c[1])
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[0])
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(1)), c[1])
# storeStep([post update] c[0], TSingletonContent(TKnownArrayElementContent(0)), [post update] c) # auto-generated reverse store (see Example 2)
# storeStep([post update] c[1], TSingletonContent(TKnownArrayElementContent(1)), [post update] c) # auto-generated reverse store (see Example 2)
c[0][unknown] = tainted
c[1][unknown] = not_tainted

# readStep(c[0], TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[0][0])
# readStep(c[1], TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[1][0])
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(0)), c[0])
# readStep(c, TKnownOrUnknownArrayElementContent(TKnownArrayElementContent(1)), c[1])
sink(c[0][0]) # bad; unknown may be 0
sink(c[0][1]) # good
sink(c[1][0]) # good
```

### Field flow barriers
Expand Down
21 changes: 20 additions & 1 deletion ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,17 @@ module SummaryComponent {
result = SC::content(TSingletonContent(DataFlow::Content::getElementContent(cv)))
}

/**
* Gets a summary component that represents an element in a collection at a specific
* known index `cv`, or an uknown index.
*/
SummaryComponent elementKnownOrUnknown(ConstantValue cv) {
result = SC::content(TKnownOrUnknownElementContent(TKnownElementContent(cv)))
or
not exists(TKnownElementContent(cv)) and
result = elementUnknown()
}

/**
* Gets a summary component that represents an element in a collection at either an unknown
* index or known index. This has the same semantics as
Expand All @@ -62,7 +73,15 @@ module SummaryComponent {
* integer index `lower` or above.
*/
SummaryComponent elementLowerBound(int lower) {
result = SC::content(TElementLowerBoundContent(lower))
result = SC::content(TElementLowerBoundContent(lower, false))
}

/**
* Gets a summary component that represents an element in a collection at known
* integer index `lower` or above, or possibly at an unknown index.
*/
SummaryComponent elementLowerBoundOrUnknown(int lower) {
result = SC::content(TElementLowerBoundContent(lower, true))
}

/** Gets a summary component that represents a value in a pair at an unknown key. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,10 @@ private module Cached {
newtype TContentSet =
TSingletonContent(Content c) or
TAnyElementContent() or
TElementLowerBoundContent(int lower) {
FlowSummaryImplSpecific::ParsePositions::isParsedElementLowerBoundPosition(_, lower)
TKnownOrUnknownElementContent(Content::KnownElementContent c) or
TElementLowerBoundContent(int lower, boolean includeUnknown) {
FlowSummaryImplSpecific::ParsePositions::isParsedElementLowerBoundPosition(_, includeUnknown,
lower)
}

cached
Expand Down
61 changes: 54 additions & 7 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,28 @@ class ContentSet extends TContentSet {
/** Holds if this content set represents all `ElementContent`s. */
predicate isAnyElement() { this = TAnyElementContent() }

/**
* Holds if this content set represents a specific known element index, or an
* unknown element index.
*/
predicate isKnownOrUnknownElement(Content::KnownElementContent c) {
this = TKnownOrUnknownElementContent(c)
}

/**
* Holds if this content set represents all `KnownElementContent`s where
* the index is an integer greater than or equal to `lower`.
*/
predicate isElementLowerBound(int lower) { this = TElementLowerBoundContent(lower) }
predicate isElementLowerBound(int lower) { this = TElementLowerBoundContent(lower, false) }

/**
* Holds if this content set represents `UnknownElementContent` unioned with
* all `KnownElementContent`s where the index is an integer greater than or
* equal to `lower`.
*/
predicate isElementLowerBoundOrUnknown(int lower) {
this = TElementLowerBoundContent(lower, true)
}

/** Gets a textual representation of this content set. */
string toString() {
Expand All @@ -345,8 +362,18 @@ class ContentSet extends TContentSet {
this.isAnyElement() and
result = "any element"
or
exists(int lower |
this.isElementLowerBound(lower) and
exists(Content::KnownElementContent c |
this.isKnownOrUnknownElement(c) and
result = c + " or unknown"
)
or
exists(int lower, boolean includeUnknown |
this = TElementLowerBoundContent(lower, includeUnknown)
|
includeUnknown = false and
result = lower + "..!"
or
includeUnknown = true and
result = lower + ".."
)
}
Expand All @@ -355,9 +382,18 @@ class ContentSet extends TContentSet {
Content getAStoreContent() {
this.isSingleton(result)
or
// For reverse stores, `a[unknown][0] = x`, it is important that the read-step
// from `a` to `a[unknown]` (which can read any element), gets translated into
// a reverse store step that store only into `?`
this.isAnyElement() and
result = TUnknownElementContent()
or
// For reverse stores, `a[1][0] = x`, it is important that the read-step
// from `a` to `a[1]` (which can read both elements stored at exactly index `1`
// and elements stored at unknown index), gets translated into a reverse store
// step that store only into `1`
this.isKnownOrUnknownElement(result)
or
this.isElementLowerBound(_) and
result = TUnknownElementContent()
}
Expand All @@ -369,10 +405,21 @@ class ContentSet extends TContentSet {
this.isAnyElement() and
result instanceof Content::ElementContent
or
exists(int lower, int i |
this.isElementLowerBound(lower) and
result.(Content::KnownElementContent).getIndex().isInt(i) and
i >= lower
exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) |
result = c or
result = TUnknownElementContent()
)
or
exists(int lower, boolean includeUnknown |
this = TElementLowerBoundContent(lower, includeUnknown)
|
exists(int i |
result.(Content::KnownElementContent).getIndex().isInt(i) and
i >= lower
)
or
includeUnknown = true and
result = TUnknownElementContent()
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,30 @@ private SummaryComponent interpretElementArg(string arg) {
arg = "any" and
result = FlowSummary::SummaryComponent::elementAny()
or
exists(int lower |
ParsePositions::isParsedElementLowerBoundPosition(arg, lower) and
exists(int lower, boolean includeUnknown |
ParsePositions::isParsedElementLowerBoundPosition(arg, includeUnknown, lower)
|
includeUnknown = false and
result = FlowSummary::SummaryComponent::elementLowerBound(lower)
or
includeUnknown = true and
result = FlowSummary::SummaryComponent::elementLowerBoundOrUnknown(lower)
)
or
exists(ConstantValue cv | result = FlowSummary::SummaryComponent::elementKnown(cv) |
cv.isInt(AccessPath::parseInt(arg))
exists(ConstantValue cv, string argAdjusted, boolean includeUnknown |
argAdjusted = ParsePositions::adjustElementArgument(arg, includeUnknown) and
(
includeUnknown = false and
result = FlowSummary::SummaryComponent::elementKnown(cv)
or
includeUnknown = true and
result = FlowSummary::SummaryComponent::elementKnownOrUnknown(cv)
)
|
cv.isInt(AccessPath::parseInt(argAdjusted))
or
not exists(AccessPath::parseInt(arg)) and
cv.serialize() = arg
not exists(AccessPath::parseInt(argAdjusted)) and
cv.serialize() = argAdjusted
)
}

Expand Down Expand Up @@ -277,9 +291,19 @@ module ParsePositions {
c = paramName + ":"
}

predicate isParsedElementLowerBoundPosition(string c, int lower) {
bindingset[arg]
string adjustElementArgument(string arg, boolean includeUnknown) {
result = arg.regexpCapture("(.*)!", 1) and
includeUnknown = false
or
result = arg and
not arg.matches("%!") and
includeUnknown = true
}

predicate isParsedElementLowerBoundPosition(string c, boolean includeUnknown, int lower) {
isElementBody(c) and
lower = AccessPath::parseLowerBound(c)
lower = AccessPath::parseLowerBound(adjustElementArgument(c, includeUnknown))
}
}

Expand Down
Loading