From 2af6db63209e66a60a025b7228ea4527af387207 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 18 Nov 2025 18:51:33 +0000 Subject: [PATCH 1/5] C++: Rename 'FieldContent' to 'NonUnionContent'. --- .../code/cpp/ir/dataflow/internal/DataFlowPrivate.qll | 2 +- .../semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 8 ++++---- .../src/utils/modelgenerator/internal/CaptureModels.qll | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 582391e81cc9..f9ea35e6bd11 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -1574,7 +1574,7 @@ pragma[inline] ContentApprox getContentApprox(Content c) { exists(string prefix, Field f | prefix = result.(FieldApproxContent).getPrefix() and - f = c.(FieldContent).getField() and + f = c.(NonUnionFieldContent).getField() and fieldHasApproxName(f, prefix) ) or diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 82dcd43e1364..e55934274866 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2093,8 +2093,8 @@ private Field getAFieldWithSize(Union u, int bytes) { cached private newtype TContent = - TFieldContent(Field f, int indirectionIndex) { - // the indirection index for field content starts at 1 (because `TFieldContent` is thought of as + TNonUnionContent(Field f, int indirectionIndex) { + // the indirection index for field content starts at 1 (because `TNonUnionContent` is thought of as // the address of the field, `FieldAddress` in the IR). indirectionIndex = [1 .. SsaImpl::getMaxIndirectionsForType(f.getUnspecifiedType())] and // Reads and writes of union fields are tracked using `UnionContent`. @@ -2163,11 +2163,11 @@ private module ContentStars { private import ContentStars /** A reference through a non-union instance field. */ -class FieldContent extends Content, TFieldContent { +class NonUnionFieldContent extends Content, TNonUnionContent { private Field f; private int indirectionIndex; - FieldContent() { this = TFieldContent(f, indirectionIndex) } + NonUnionFieldContent() { this = TNonUnionContent(f, indirectionIndex) } override string toString() { result = contentStars(this) + f.toString() } diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ea6ab058134f..63d2eee17c94 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -190,7 +190,7 @@ module ModelGeneratorCommonInput implements ModelGeneratorCommonInputSig Date: Tue, 18 Nov 2025 18:52:20 +0000 Subject: [PATCH 2/5] C++: Get rid of abstract'ness from these public predicates. We are not supposed to have abstract public stuff. Oops ... --- .../semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index e55934274866..6c93fabfd704 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2124,14 +2124,14 @@ private newtype TContent = */ class Content extends TContent { /** Gets a textual representation of this element. */ - abstract string toString(); + string toString() { none() } // overridden in subclasses predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0 } /** Gets the indirection index of this `Content`. */ - abstract int getIndirectionIndex(); + int getIndirectionIndex() { none() } // overridden in subclasses /** * INTERNAL: Do not use. @@ -2142,7 +2142,7 @@ class Content extends TContent { * For example, a write to a field `f` implies that any content of * the form `*f` is also cleared. */ - abstract predicate impliesClearOf(Content c); + predicate impliesClearOf(Content c) { none() } // overridden in subclasses } /** From 7f0fcb0c468a5121bf442b35cfd0d3ea5c0d5645 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 18 Nov 2025 18:53:37 +0000 Subject: [PATCH 3/5] C++: Create a common base class for 'NonUnionContent' and 'UnionContent' called 'FieldContent'. --- .../ir/dataflow/internal/DataFlowPrivate.qll | 36 +++++++------------ .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 31 +++++++++++++--- .../modelgenerator/internal/CaptureModels.qll | 7 +--- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index f9ea35e6bd11..03289a42e40f 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -873,23 +873,16 @@ predicate jumpStep(Node n1, Node n2) { predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) { exists( PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads, - StoreInstruction store + StoreInstruction store, FieldContent fc | postFieldUpdate = node2 and + fc = c and nodeHasInstruction(node1, store, pragma[only_bind_into](indirectionIndex1)) and postFieldUpdate.getIndirectionIndex() = 1 and numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(), - store.getDestinationAddressOperand(), numberOfLoads, certain) - | - exists(FieldContent fc | fc = c | - fc.getField() = postFieldUpdate.getUpdatedField() and - fc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads - ) - or - exists(UnionContent uc | uc = c | - uc.getAField() = postFieldUpdate.getUpdatedField() and - uc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads - ) + store.getDestinationAddressOperand(), numberOfLoads, certain) and + fc.getAField() = postFieldUpdate.getUpdatedField() and + fc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads ) or // models-as-data summarized flow @@ -965,22 +958,17 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex) * `node2`. */ predicate readStep(Node node1, ContentSet c, Node node2) { - exists(FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2 | + exists( + FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc + | + fc = c and nodeHasOperand(node2, operand, indirectionIndex2) and // The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct // in `storeStep`. nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and - numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) - | - exists(FieldContent fc | fc = c | - fc.getField() = fa1.getField() and - fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads - ) - or - exists(UnionContent uc | uc = c | - uc.getAField() = fa1.getField() and - uc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads - ) + numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and + fc.getAField() = fa1.getField() and + fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads ) or // models-as-data summarized flow diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 6c93fabfd704..7a9ab6ea2f76 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2162,8 +2162,31 @@ private module ContentStars { private import ContentStars +private class TFieldContent = TNonUnionContent or TUnionContent; + +/** + * A `Content` that references a `Field`. This may be a field of a `struct`, + * `class`, or `union`. In the case of a `union` there may be multiple fields + * associated with the same `Content`. + */ +class FieldContent extends Content, TFieldContent { + /** Gets a `Field` of this `Content`. */ + Field getAField() { none() } + + /** + * Gets the field associated with this `Content`, if a unique one exists. + */ + final Field getField() { result = unique( | | this.getAField()) } + + override int getIndirectionIndex() { none() } // overridden in subclasses + + override string toString() { none() } // overridden in subclasses + + override predicate impliesClearOf(Content c) { none() } // overridden in subclasses +} + /** A reference through a non-union instance field. */ -class NonUnionFieldContent extends Content, TNonUnionContent { +class NonUnionFieldContent extends FieldContent, TNonUnionContent { private Field f; private int indirectionIndex; @@ -2171,7 +2194,7 @@ class NonUnionFieldContent extends Content, TNonUnionContent { override string toString() { result = contentStars(this) + f.toString() } - Field getField() { result = f } + override Field getAField() { result = f } /** Gets the indirection index of this `FieldContent`. */ pragma[inline] @@ -2191,7 +2214,7 @@ class NonUnionFieldContent extends Content, TNonUnionContent { } /** A reference through an instance field of a union. */ -class UnionContent extends Content, TUnionContent { +class UnionContent extends FieldContent, TUnionContent { private Union u; private int indirectionIndex; private int bytes; @@ -2201,7 +2224,7 @@ class UnionContent extends Content, TUnionContent { override string toString() { result = contentStars(this) + u.toString() } /** Gets a field of the underlying union of this `UnionContent`, if any. */ - Field getAField() { result = u.getAField() and getFieldSize(result) = bytes } + override Field getAField() { result = u.getAField() and getFieldSize(result) = bytes } /** Gets the underlying union of this `UnionContent`. */ Union getUnion() { result = u } diff --git a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 63d2eee17c94..28892c5b8207 100644 --- a/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/cpp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -340,12 +340,7 @@ private module SummaryModelGeneratorInput implements SummaryModelGeneratorInputS ) } - predicate isField(DataFlow::ContentSet cs) { - exists(DataFlow::Content c | cs.isSingleton(c) | - c instanceof DataFlow::FieldContent or - c instanceof DataFlow::UnionContent - ) - } + predicate isField(DataFlow::ContentSet cs) { cs.isSingleton(any(DataFlow::FieldContent fc)) } predicate isCallback(DataFlow::ContentSet c) { none() } From 9bfe847fda437ae5dd73100108edf18d953c5a36 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 19 Nov 2025 10:08:09 +0000 Subject: [PATCH 4/5] C++: Fix awful joins on bochs: ``` Evaluated relational algebra for predicate DataFlowPrivate::storeStepImpl/4#b2c79f9a@13be12rc with tuple counts: 9 ~0% {3} r1 = JOIN `FlowSummaryImpl::Private::Steps::summaryStoreStep/3#5c2d4899` WITH DataFlowUtil::TFlowSummaryNode#40da8361 ON FIRST 1 OUTPUT Lhs.2, Lhs.1, Rhs.1 9 ~0% {4} | JOIN WITH DataFlowUtil::TFlowSummaryNode#40da8361 ON FIRST 1 OUTPUT Lhs.2, Lhs.1, Rhs.1, _ 9 ~12% {4} | REWRITE WITH Out.3 := true 1853420 ~0% {3} r2 = SCAN `DataFlowPrivate::nodeHasInstruction/3#f469bb06` OUTPUT In.1, In.0, In.2 100282 ~0% {3} | JOIN WITH `Instruction::StoreInstruction.getDestinationAddressOperand/0#dispred#596a4aba` ON FIRST 1 OUTPUT Rhs.1, Lhs.1, Lhs.2 127910 ~0% {6} | JOIN WITH `DataFlowPrivate::numberOfLoadsFromOperand/4#7e555666_1023#join_rhs` ON FIRST 1 OUTPUT _, Lhs.1, Rhs.1, Rhs.3, Lhs.2, Rhs.2 127910 ~0% {4} | REWRITE WITH Tmp.0 := 1, Out.0 := (Tmp.0 + In.4 + In.5) KEEPING 4 4178182721 ~1% {4} | JOIN WITH `DataFlowUtil::FieldContent.getIndirectionIndex/0#dispred#cc69866f_10#join_rhs` ON FIRST 1 OUTPUT Rhs.1, Lhs.1, Lhs.2, Lhs.3 4290552803 ~0% {5} | JOIN WITH `DataFlowUtil::FieldContent.getAField/0#dispred#ba1c91e5` ON FIRST 1 OUTPUT Lhs.2, Lhs.1, Lhs.3, Lhs.0, Rhs.1 3033745816 ~5% {7} | JOIN WITH DataFlowUtil::PostFieldUpdateNode#b86f3a84_1023#join_rhs ON FIRST 1 OUTPUT Rhs.1, Lhs.1, Lhs.2, Lhs.3, Lhs.4, Rhs.2, Rhs.3 3033745816 ~3% {9} | JOIN WITH DataFlowUtil::TPostUpdateNodeImpl#f5e76b7a_21#join_rhs ON FIRST 1 OUTPUT Lhs.1, Lhs.2, Lhs.3, Lhs.4, Lhs.0, Lhs.5, Lhs.6, Rhs.1, _ {8} | REWRITE WITH Tmp.8 := 1, TEST InOut.7 = Tmp.8 KEEPING 8 1516872908 ~0% {7} | SCAN OUTPUT In.4, In.5, In.6, In.0, In.1, In.2, In.3 2409090286 ~1% {6} | JOIN WITH DataFlowUtil::PostFieldUpdateNode#b86f3a84_0231#join_rhs ON FIRST 3 OUTPUT Rhs.3, Lhs.6, Lhs.3, Lhs.4, Lhs.5, Lhs.0 66016 ~45% {4} | JOIN WITH `DataFlowUtil::FieldAddress.getField/0#dispred#bdd01c1a` ON FIRST 2 OUTPUT Lhs.2, Lhs.4, Lhs.5, Lhs.3 66025 ~45% {4} r3 = r1 UNION r2 return r3 ``` --- .../cpp/ir/dataflow/internal/DataFlowPrivate.qll | 11 ++++++++--- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 15 +++------------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 03289a42e40f..b5f4f88f4bd5 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -861,6 +861,10 @@ predicate jumpStep(Node n1, Node n2) { n2.(FlowSummaryNode).getSummaryNode()) } +bindingset[c] +pragma[inline_late] +private int getIndirectionIndexLate(Content c) { result = c.getIndirectionIndex() } + /** * Holds if data can flow from `node1` to `node2` via an assignment to `f`. * Thus, `node2` references an object with a field `f` that contains the @@ -877,12 +881,13 @@ predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) { | postFieldUpdate = node2 and fc = c and - nodeHasInstruction(node1, store, pragma[only_bind_into](indirectionIndex1)) and + nodeHasInstruction(node1, pragma[only_bind_into](store), + pragma[only_bind_into](indirectionIndex1)) and postFieldUpdate.getIndirectionIndex() = 1 and numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(), store.getDestinationAddressOperand(), numberOfLoads, certain) and fc.getAField() = postFieldUpdate.getUpdatedField() and - fc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads + getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads ) or // models-as-data summarized flow @@ -968,7 +973,7 @@ predicate readStep(Node node1, ContentSet c, Node node2) { nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and fc.getAField() = fa1.getField() and - fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads + getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads ) or // models-as-data summarized flow diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 7a9ab6ea2f76..7c2b15b18127 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2197,10 +2197,7 @@ class NonUnionFieldContent extends FieldContent, TNonUnionContent { override Field getAField() { result = f } /** Gets the indirection index of this `FieldContent`. */ - pragma[inline] - override int getIndirectionIndex() { - pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) - } + override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { exists(FieldContent fc | @@ -2230,10 +2227,7 @@ class UnionContent extends FieldContent, TUnionContent { Union getUnion() { result = u } /** Gets the indirection index of this `UnionContent`. */ - pragma[inline] - override int getIndirectionIndex() { - pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) - } + override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { exists(UnionContent uc | @@ -2257,10 +2251,7 @@ class ElementContent extends Content, TElementContent { ElementContent() { this = TElementContent(indirectionIndex) } - pragma[inline] - override int getIndirectionIndex() { - pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) - } + override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { none() } From 6c4def13b43aba1709727254eeedcd9344fca3ca Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 19 Nov 2025 17:23:39 +0000 Subject: [PATCH 5/5] C++: Add change note. --- cpp/ql/lib/change-notes/2025-11-19-content.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 cpp/ql/lib/change-notes/2025-11-19-content.md diff --git a/cpp/ql/lib/change-notes/2025-11-19-content.md b/cpp/ql/lib/change-notes/2025-11-19-content.md new file mode 100644 index 000000000000..e16bfc903bf9 --- /dev/null +++ b/cpp/ql/lib/change-notes/2025-11-19-content.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The class `DataFlow::FieldContent` now covers both `union` and `struct`/`class` types. A new predicate `FieldContent.getAField` has been added to access the union members associated with the `FieldContent`. The old `FieldContent` has been renamed to `NonUnionFieldContent`. \ No newline at end of file