diff --git a/cpp/ql/lib/change-notes/2025-11-19-content.md b/cpp/ql/lib/change-notes/2025-11-19-content.md new file mode 100644 index 000000000000..e16bfc903bf9 --- /dev/null +++ b/cpp/ql/lib/change-notes/2025-11-19-content.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The class `DataFlow::FieldContent` now covers both `union` and `struct`/`class` types. A new predicate `FieldContent.getAField` has been added to access the union members associated with the `FieldContent`. The old `FieldContent` has been renamed to `NonUnionFieldContent`. \ No newline at end of file diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 582391e81cc9..b5f4f88f4bd5 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -861,6 +861,10 @@ predicate jumpStep(Node n1, Node n2) { n2.(FlowSummaryNode).getSummaryNode()) } +bindingset[c] +pragma[inline_late] +private int getIndirectionIndexLate(Content c) { result = c.getIndirectionIndex() } + /** * Holds if data can flow from `node1` to `node2` via an assignment to `f`. * Thus, `node2` references an object with a field `f` that contains the @@ -873,23 +877,17 @@ predicate jumpStep(Node n1, Node n2) { predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) { exists( PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads, - StoreInstruction store + StoreInstruction store, FieldContent fc | postFieldUpdate = node2 and - nodeHasInstruction(node1, store, pragma[only_bind_into](indirectionIndex1)) and + fc = c and + nodeHasInstruction(node1, pragma[only_bind_into](store), + pragma[only_bind_into](indirectionIndex1)) and postFieldUpdate.getIndirectionIndex() = 1 and numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(), - store.getDestinationAddressOperand(), numberOfLoads, certain) - | - exists(FieldContent fc | fc = c | - fc.getField() = postFieldUpdate.getUpdatedField() and - fc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads - ) - or - exists(UnionContent uc | uc = c | - uc.getAField() = postFieldUpdate.getUpdatedField() and - uc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads - ) + store.getDestinationAddressOperand(), numberOfLoads, certain) and + fc.getAField() = postFieldUpdate.getUpdatedField() and + getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads ) or // models-as-data summarized flow @@ -965,22 +963,17 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex) * `node2`. */ predicate readStep(Node node1, ContentSet c, Node node2) { - exists(FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2 | + exists( + FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc + | + fc = c and nodeHasOperand(node2, operand, indirectionIndex2) and // The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct // in `storeStep`. nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and - numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) - | - exists(FieldContent fc | fc = c | - fc.getField() = fa1.getField() and - fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads - ) - or - exists(UnionContent uc | uc = c | - uc.getAField() = fa1.getField() and - uc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads - ) + numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and + fc.getAField() = fa1.getField() and + getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads ) or // models-as-data summarized flow @@ -1574,7 +1567,7 @@ pragma[inline] ContentApprox getContentApprox(Content c) { exists(string prefix, Field f | prefix = result.(FieldApproxContent).getPrefix() and - f = c.(FieldContent).getField() and + f = c.(NonUnionFieldContent).getField() and fieldHasApproxName(f, prefix) ) or diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 82dcd43e1364..7c2b15b18127 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2093,8 +2093,8 @@ private Field getAFieldWithSize(Union u, int bytes) { cached private newtype TContent = - TFieldContent(Field f, int indirectionIndex) { - // the indirection index for field content starts at 1 (because `TFieldContent` is thought of as + TNonUnionContent(Field f, int indirectionIndex) { + // the indirection index for field content starts at 1 (because `TNonUnionContent` is thought of as // the address of the field, `FieldAddress` in the IR). indirectionIndex = [1 .. SsaImpl::getMaxIndirectionsForType(f.getUnspecifiedType())] and // Reads and writes of union fields are tracked using `UnionContent`. @@ -2124,14 +2124,14 @@ private newtype TContent = */ class Content extends TContent { /** Gets a textual representation of this element. */ - abstract string toString(); + string toString() { none() } // overridden in subclasses predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0 } /** Gets the indirection index of this `Content`. */ - abstract int getIndirectionIndex(); + int getIndirectionIndex() { none() } // overridden in subclasses /** * INTERNAL: Do not use. @@ -2142,7 +2142,7 @@ class Content extends TContent { * For example, a write to a field `f` implies that any content of * the form `*f` is also cleared. */ - abstract predicate impliesClearOf(Content c); + predicate impliesClearOf(Content c) { none() } // overridden in subclasses } /** @@ -2162,22 +2162,42 @@ private module ContentStars { private import ContentStars -/** A reference through a non-union instance field. */ +private class TFieldContent = TNonUnionContent or TUnionContent; + +/** + * A `Content` that references a `Field`. This may be a field of a `struct`, + * `class`, or `union`. In the case of a `union` there may be multiple fields + * associated with the same `Content`. + */ class FieldContent extends Content, TFieldContent { + /** Gets a `Field` of this `Content`. */ + Field getAField() { none() } + + /** + * Gets the field associated with this `Content`, if a unique one exists. + */ + final Field getField() { result = unique( | | this.getAField()) } + + override int getIndirectionIndex() { none() } // overridden in subclasses + + override string toString() { none() } // overridden in subclasses + + override predicate impliesClearOf(Content c) { none() } // overridden in subclasses +} + +/** A reference through a non-union instance field. */ +class NonUnionFieldContent extends FieldContent, TNonUnionContent { private Field f; private int indirectionIndex; - FieldContent() { this = TFieldContent(f, indirectionIndex) } + NonUnionFieldContent() { this = TNonUnionContent(f, indirectionIndex) } override string toString() { result = contentStars(this) + f.toString() } - Field getField() { result = f } + override Field getAField() { result = f } /** Gets the indirection index of this `FieldContent`. */ - pragma[inline] - override int getIndirectionIndex() { - pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) - } + override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { exists(FieldContent fc | @@ -2191,7 +2211,7 @@ class FieldContent extends Content, TFieldContent { } /** A reference through an instance field of a union. */ -class UnionContent extends Content, TUnionContent { +class UnionContent extends FieldContent, TUnionContent { private Union u; private int indirectionIndex; private int bytes; @@ -2201,16 +2221,13 @@ class UnionContent extends Content, TUnionContent { override string toString() { result = contentStars(this) + u.toString() } /** Gets a field of the underlying union of this `UnionContent`, if any. */ - Field getAField() { result = u.getAField() and getFieldSize(result) = bytes } + override Field getAField() { result = u.getAField() and getFieldSize(result) = bytes } /** Gets the underlying union of this `UnionContent`. */ Union getUnion() { result = u } /** Gets the indirection index of this `UnionContent`. */ - pragma[inline] - override int getIndirectionIndex() { - pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) - } + override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { exists(UnionContent uc | @@ -2234,10 +2251,7 @@ class ElementContent extends Content, TElementContent { ElementContent() { this = TElementContent(indirectionIndex) } - pragma[inline] - override int getIndirectionIndex() { - pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) - } + override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { none() } diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ea6ab058134f..28892c5b8207 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -190,7 +190,7 @@ module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig