diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 7c2b15b18127..1185b6a0c9c3 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2078,38 +2078,151 @@ predicate localExprFlow(Expr e1, Expr e2) { localExprFlowPlus(e1, e2) } +/** + * A canonical representation of a field. + * + * For performance reasons we want a unique `Content` that represents + * a given field across any template instantiation of a class. + * + * This is possible in _almost_ all cases, but there are cases where it is + * not possible to map between a field in the uninstantiated template to a + * field in the instantiated template. This happens in the case of local class + * definitions (because the local class is not the template that constructs + * the instantiation - it is the enclosing function). So this abstract class + * has two implementations: a non-local case (where we can represent a + * canonical field as the field declaration from an uninstantiated class + * template or a non-templated class), and a local case (where we simply use + * the field from the instantiated class). + */ +abstract private class CanonicalField extends Field { + /** Gets a field represented by this canonical field. */ + abstract Field getAField(); + + /** + * Gets a class that declares a field represented by this canonical field. + */ + abstract Class getADeclaringType(); + + /** + * Gets a type that this canonical field may have. Note that this may + * not be a unique type. For example, consider this case: + * ``` + * template + * struct S { T x; }; + * + * S s1; + * S s2; + * ``` + * In this case the canonical field corresponding to `S::x` has two types: + * `int` and `char`. + */ + Type getAType() { result = this.getAField().getType() } + + Type getAnUnspecifiedType() { result = this.getAType().getUnspecifiedType() } +} + +private class NonLocalCanonicalField extends CanonicalField { + Class declaringType; + + NonLocalCanonicalField() { + declaringType = this.getDeclaringType() and + not declaringType.isFromTemplateInstantiation(_) and + not declaringType.isLocal() // handled in LocalCanonicalField + } + + override Field getAField() { + exists(Class c | result.getDeclaringType() = c | + // Either the declaring class of the field is a template instantiation + // that has been constructed from this canonical declaration + c.isConstructedFrom(declaringType) and + pragma[only_bind_out](result.getName()) = pragma[only_bind_out](this.getName()) + or + // or this canonical declaration is not a template. + not c.isConstructedFrom(_) and + result = this + ) + } + + override Class getADeclaringType() { + result = this.getDeclaringType() + or + result.isConstructedFrom(this.getDeclaringType()) + } +} + +private class LocalCanonicalField extends CanonicalField { + Class declaringType; + + LocalCanonicalField() { + declaringType = this.getDeclaringType() and + declaringType.isLocal() + } + + override Field getAField() { result = this } + + override Class getADeclaringType() { result = declaringType } +} + +/** + * A canonical representation of a `Union`. See `CanonicalField` for the explanation for + * why we need a canonical representation. + */ +abstract private class CanonicalUnion extends Union { + /** Gets a union represented by this canonical union. */ + abstract Union getAUnion(); + + /** Gets a canonical field of this canonical union. */ + CanonicalField getACanonicalField() { result.getDeclaringType() = this } +} + +private class NonLocalCanonicalUnion extends CanonicalUnion { + NonLocalCanonicalUnion() { not this.isFromTemplateInstantiation(_) and not this.isLocal() } + + override Union getAUnion() { + result = this + or + result.isConstructedFrom(this) + } +} + +private class LocalCanonicalUnion extends CanonicalUnion { + LocalCanonicalUnion() { this.isLocal() } + + override Union getAUnion() { result = this } +} + bindingset[f] pragma[inline_late] -private int getFieldSize(Field f) { result = f.getType().getSize() } +private int getFieldSize(CanonicalField f) { result = max(f.getAType().getSize()) } /** * Gets a field in the union `u` whose size * is `bytes` number of bytes. */ -private Field getAFieldWithSize(Union u, int bytes) { - result = u.getAField() and +private CanonicalField getAFieldWithSize(CanonicalUnion u, int bytes) { + result = u.getACanonicalField() and bytes = getFieldSize(result) } cached private newtype TContent = - TNonUnionContent(Field f, int indirectionIndex) { + TNonUnionContent(CanonicalField f, int indirectionIndex) { // the indirection index for field content starts at 1 (because `TNonUnionContent` is thought of as // the address of the field, `FieldAddress` in the IR). - indirectionIndex = [1 .. SsaImpl::getMaxIndirectionsForType(f.getUnspecifiedType())] and + indirectionIndex = [1 .. max(SsaImpl::getMaxIndirectionsForType(f.getAnUnspecifiedType()))] and // Reads and writes of union fields are tracked using `UnionContent`. not f.getDeclaringType() instanceof Union } or - TUnionContent(Union u, int bytes, int indirectionIndex) { - exists(Field f | - f = u.getAField() and + TUnionContent(CanonicalUnion u, int bytes, int indirectionIndex) { + exists(CanonicalField f | + f = u.getACanonicalField() and bytes = getFieldSize(f) and // We key `UnionContent` by the union instead of its fields since a write to one // field can be read by any read of the union's fields. Again, the indirection index // is 1-based (because 0 is considered the address). indirectionIndex = [1 .. max(SsaImpl::getMaxIndirectionsForType(getAFieldWithSize(u, bytes) - .getUnspecifiedType()) + .getAnUnspecifiedType()) )] ) } or @@ -2175,8 +2288,12 @@ class FieldContent extends Content, TFieldContent { /** * Gets the field associated with this `Content`, if a unique one exists. + * + * For fields from template instantiations this predicate may still return + * more than one field, but all the fields will be constructed from the same + * template. */ - final Field getField() { result = unique( | | this.getAField()) } + Field getField() { none() } // overridden in subclasses override int getIndirectionIndex() { none() } // overridden in subclasses @@ -2187,32 +2304,33 @@ class FieldContent extends Content, TFieldContent { /** A reference through a non-union instance field. */ class NonUnionFieldContent extends FieldContent, TNonUnionContent { - private Field f; + private CanonicalField f; private int indirectionIndex; NonUnionFieldContent() { this = TNonUnionContent(f, indirectionIndex) } override string toString() { result = contentStars(this) + f.toString() } - override Field getAField() { result = f } + final override Field getField() { result = f.getAField() } + + override Field getAField() { result = this.getField() } /** Gets the indirection index of this `FieldContent`. */ override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { - exists(FieldContent fc | - fc = c and - fc.getField() = f and + exists(int i | + c = TNonUnionContent(f, i) and // If `this` is `f` then `c` is cleared if it's of the // form `*f`, `**f`, etc. - fc.getIndirectionIndex() >= indirectionIndex + i >= indirectionIndex ) } } /** A reference through an instance field of a union. */ class UnionContent extends FieldContent, TUnionContent { - private Union u; + private CanonicalUnion u; private int indirectionIndex; private int bytes; @@ -2220,24 +2338,31 @@ class UnionContent extends FieldContent, TUnionContent { override string toString() { result = contentStars(this) + u.toString() } + final override Field getField() { result = unique( | | u.getACanonicalField()).getAField() } + /** Gets a field of the underlying union of this `UnionContent`, if any. */ - override Field getAField() { result = u.getAField() and getFieldSize(result) = bytes } + override Field getAField() { + exists(CanonicalField cf | + cf = u.getACanonicalField() and + result = cf.getAField() and + getFieldSize(cf) = bytes + ) + } /** Gets the underlying union of this `UnionContent`. */ - Union getUnion() { result = u } + Union getUnion() { result = u.getAUnion() } /** Gets the indirection index of this `UnionContent`. */ override int getIndirectionIndex() { result = indirectionIndex } override predicate impliesClearOf(Content c) { - exists(UnionContent uc | - uc = c and - uc.getUnion() = u and + exists(int i | + c = TUnionContent(u, _, i) and // If `this` is `u` then `c` is cleared if it's of the // form `*u`, `**u`, etc. (and we ignore `bytes` because // we know the entire union is overwritten because it's a // union). - uc.getIndirectionIndex() >= indirectionIndex + i >= indirectionIndex ) } } diff --git a/cpp/ql/test/library-tests/dataflow/fields/dataflow-consistency.expected b/cpp/ql/test/library-tests/dataflow/fields/dataflow-consistency.expected index 4021dbc492a9..1b0b906af52a 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/dataflow-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/dataflow-consistency.expected @@ -142,6 +142,7 @@ postWithInFlow | simple.cpp:92:7:92:7 | i [post update] | PostUpdateNode should not be the target of local flow. | | simple.cpp:118:7:118:7 | i [post update] | PostUpdateNode should not be the target of local flow. | | simple.cpp:124:5:124:6 | * ... [post update] | PostUpdateNode should not be the target of local flow. | +| simple.cpp:167:9:167:9 | x [post update] | PostUpdateNode should not be the target of local flow. | viableImplInCallContextTooLarge uniqueParameterNodeAtPosition uniqueParameterNodePosition diff --git a/cpp/ql/test/library-tests/dataflow/fields/partial-definition-diff.expected b/cpp/ql/test/library-tests/dataflow/fields/partial-definition-diff.expected index 8137e350d853..6cce6ac4f893 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/partial-definition-diff.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/partial-definition-diff.expected @@ -308,3 +308,5 @@ WARNING: module 'DataFlow' has been deprecated and may be removed in future (par | simple.cpp:124:5:124:6 | * ... | AST only | | simple.cpp:131:14:131:14 | a | IR only | | simple.cpp:136:10:136:10 | a | IR only | +| simple.cpp:167:9:167:9 | x | AST only | +| simple.cpp:168:8:168:12 | u_int | IR only | diff --git a/cpp/ql/test/library-tests/dataflow/fields/partial-definition-ir.expected b/cpp/ql/test/library-tests/dataflow/fields/partial-definition-ir.expected index 8df575d8e167..b09f949271db 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/partial-definition-ir.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/partial-definition-ir.expected @@ -670,6 +670,8 @@ | simple.cpp:131:14:131:14 | a | | simple.cpp:135:20:135:20 | q | | simple.cpp:136:10:136:10 | a | +| simple.cpp:167:3:167:7 | u_int | +| simple.cpp:168:8:168:12 | u_int | | struct_init.c:15:8:15:9 | ab | | struct_init.c:15:12:15:12 | a | | struct_init.c:16:8:16:9 | ab | diff --git a/cpp/ql/test/library-tests/dataflow/fields/partial-definition.expected b/cpp/ql/test/library-tests/dataflow/fields/partial-definition.expected index 397e069c1669..127cbadd9719 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/partial-definition.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/partial-definition.expected @@ -597,6 +597,8 @@ WARNING: module 'DataFlow' has been deprecated and may be removed in future (par | simple.cpp:118:7:118:7 | i | | simple.cpp:124:5:124:6 | * ... | | simple.cpp:135:20:135:20 | q | +| simple.cpp:167:3:167:7 | u_int | +| simple.cpp:167:9:167:9 | x | | struct_init.c:15:8:15:9 | ab | | struct_init.c:15:12:15:12 | a | | struct_init.c:16:8:16:9 | ab | diff --git a/cpp/ql/test/library-tests/dataflow/fields/simple.cpp b/cpp/ql/test/library-tests/dataflow/fields/simple.cpp index 3b8d882bc2be..d220b416e1a9 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/simple.cpp +++ b/cpp/ql/test/library-tests/dataflow/fields/simple.cpp @@ -136,4 +136,36 @@ void alias_with_fields(bool b) { sink(a.i); // $ MISSING: ast,ir } +template +union U_with_two_instantiations_of_different_size { + int x; + T y; +}; + +struct LargeStruct { + int data[64]; +}; + +void test_union_with_two_instantiations_of_different_sizes() { + // A union's fields is partitioned into "chunks" for field-flow in order to + // improve performance (so that a write to a field of a union does not flow + // to too many reads that don't happen at runtime). The partitioning is based + // the size of the types in the union. So a write to a field of size k only + // flows to a read of size k. + // Since field-flow is based on uninstantiated types a field can have + // multiple sizes if the union is instantiated with types of + // different sizes. So to compute the partition we pick the maximum size. + // Because of this there are `Content`s corresponding to the union + // `U_with_two_instantiations_of_different_size`: The one for size + // `sizeof(int)`, and the one for size `sizeof(LargeStruct)` (because + // `LargeStruct` is larger than `int`). So the write to `x` writes to the + // `Content` for size `sizeof(int)`, and the read of `y` reads from the + // `Content` for size `sizeof(LargeStruct)`. + U_with_two_instantiations_of_different_size u_int; + U_with_two_instantiations_of_different_size u_very_large; + + u_int.x = user_input(); + sink(u_int.y); // $ MISSING: ir +} + } // namespace Simple \ No newline at end of file diff --git a/cpp/ql/test/library-tests/variables/variables/variable.expected b/cpp/ql/test/library-tests/variables/variables/variable.expected index 1ef5902a9a88..c6ada1f36157 100644 --- a/cpp/ql/test/library-tests/variables/variables/variable.expected +++ b/cpp/ql/test/library-tests/variables/variables/variable.expected @@ -2,10 +2,10 @@ | file://:0:0:0:0 | (unnamed parameter 0) | file://:0:0:0:0 | address && | SemanticStackVariable | | | | file://:0:0:0:0 | (unnamed parameter 0) | file://:0:0:0:0 | const __va_list_tag & | SemanticStackVariable | | | | file://:0:0:0:0 | (unnamed parameter 0) | file://:0:0:0:0 | const address & | SemanticStackVariable | | | -| file://:0:0:0:0 | fp_offset | file://:0:0:0:0 | unsigned int | Field | | | -| file://:0:0:0:0 | gp_offset | file://:0:0:0:0 | unsigned int | Field | | | -| file://:0:0:0:0 | overflow_arg_area | file://:0:0:0:0 | void * | Field | | | -| file://:0:0:0:0 | reg_save_area | file://:0:0:0:0 | void * | Field | | | +| file://:0:0:0:0 | fp_offset | file://:0:0:0:0 | unsigned int | NonLocalCanonicalField | | | +| file://:0:0:0:0 | gp_offset | file://:0:0:0:0 | unsigned int | NonLocalCanonicalField | | | +| file://:0:0:0:0 | overflow_arg_area | file://:0:0:0:0 | void * | NonLocalCanonicalField | | | +| file://:0:0:0:0 | reg_save_area | file://:0:0:0:0 | void * | NonLocalCanonicalField | | | | variables.cpp:1:12:1:12 | i | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | | | variables.cpp:2:12:2:12 | i | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | | | variables.cpp:3:12:3:12 | i | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | | @@ -33,10 +33,10 @@ | variables.cpp:37:6:37:8 | ap3 | file://:0:0:0:0 | int * | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | | | variables.cpp:41:7:41:11 | local | file://:0:0:0:0 | char[] | LocalVariable, SemanticStackVariable | | | | variables.cpp:43:14:43:18 | local | file://:0:0:0:0 | int | GlobalLikeVariable, StaticLocalVariable | | static | -| variables.cpp:48:9:48:12 | name | file://:0:0:0:0 | char * | Field | | | -| variables.cpp:49:12:49:17 | number | file://:0:0:0:0 | long | Field | | | -| variables.cpp:50:9:50:14 | street | file://:0:0:0:0 | char * | Field | | | -| variables.cpp:51:9:51:12 | town | file://:0:0:0:0 | char * | Field | | | +| variables.cpp:48:9:48:12 | name | file://:0:0:0:0 | char * | NonLocalCanonicalField | | | +| variables.cpp:49:12:49:17 | number | file://:0:0:0:0 | long | NonLocalCanonicalField | | | +| variables.cpp:50:9:50:14 | street | file://:0:0:0:0 | char * | NonLocalCanonicalField | | | +| variables.cpp:51:9:51:12 | town | file://:0:0:0:0 | char * | NonLocalCanonicalField | | | | variables.cpp:52:16:52:22 | country | file://:0:0:0:0 | char * | MemberVariable, StaticStorageDurationVariable | | static | | variables.cpp:56:14:56:29 | externInFunction | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | | | variables.cpp:60:10:60:17 | __func__ | file://:0:0:0:0 | const char[9] | GlobalLikeVariable, StaticInitializedStaticLocalVariable | | static |