From 3fb0408836dc0fddc541b98f831ab71de8461073 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 11 Mar 2019 13:43:46 +0000 Subject: [PATCH 01/10] Reinstate symex-deref's `write` parameter field_sensitivity::apply needs to avoid expanding struct-typed symbols into fields when they're being used as lvalues. --- src/goto-symex/goto_symex.h | 4 ++-- src/goto-symex/symex_clean_expr.cpp | 2 +- src/goto-symex/symex_dereference.cpp | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 3b75c5c8e98..c48145e94d5 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -279,9 +279,9 @@ class goto_symext void initialize_auto_object(const exprt &, statet &); void process_array_expr(statet &, exprt &); exprt make_auto_object(const typet &, statet &); - virtual void dereference(exprt &, statet &); + virtual void dereference(exprt &, statet &, bool write); - void dereference_rec(exprt &, statet &); + void dereference_rec(exprt &, statet &, bool write); exprt address_arithmetic( const exprt &, statet &, diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index ee764424c66..328e227ce9a 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -175,7 +175,7 @@ void goto_symext::clean_expr( const bool write) { replace_nondet(expr, path_storage.build_symex_nondet); - dereference(expr, state); + dereference(expr, state, write); // make sure all remaining byte extract operations use the root // object to avoid nesting of with/update and byte_update when on diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 531daa4838b..df03ade1dfb 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -70,7 +70,7 @@ exprt goto_symext::address_arithmetic( // there could be further dereferencing in the offset exprt offset=be.offset(); - dereference_rec(offset, state); + dereference_rec(offset, state, false); result=plus_exprt(result, offset); @@ -106,14 +106,14 @@ exprt goto_symext::address_arithmetic( // just grab the pointer, but be wary of further dereferencing // in the pointer itself result=to_dereference_expr(expr).pointer(); - dereference_rec(result, state); + dereference_rec(result, state, false); } else if(expr.id()==ID_if) { if_exprt if_expr=to_if_expr(expr); // the condition is not an address - dereference_rec(if_expr.cond(), state); + dereference_rec(if_expr.cond(), state, false); // recursive call if_expr.true_case() = @@ -130,7 +130,7 @@ exprt goto_symext::address_arithmetic( { // give up, just dereference result=expr; - dereference_rec(result, state); + dereference_rec(result, state, false); // turn &array into &array[0] if(result.type().id() == ID_array && !keep_array) @@ -198,7 +198,7 @@ exprt goto_symext::address_arithmetic( /// such as `&struct.flexible_array[0]` (see inline comments in code). /// For full details of this method's pointer replacement and potential side- /// effects see \ref goto_symext::dereference -void goto_symext::dereference_rec(exprt &expr, statet &state) +void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) { if(expr.id()==ID_dereference) { @@ -221,7 +221,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) tmp1.swap(to_dereference_expr(expr).pointer()); // first make sure there are no dereferences in there - dereference_rec(tmp1, state); + dereference_rec(tmp1, state, false); // we need to set up some elaborate call-backs symex_dereference_statet symex_dereference_state(state, ns); @@ -259,7 +259,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) tmp.add_source_location()=expr.source_location(); // recursive call - dereference_rec(tmp, state); + dereference_rec(tmp, state, write); expr.swap(tmp); } @@ -297,17 +297,17 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) to_address_of_expr(tc_op).object(), from_integer(0, index_type()))); - dereference_rec(expr, state); + dereference_rec(expr, state, write); } else { - dereference_rec(tc_op, state); + dereference_rec(tc_op, state, write); } } else { Forall_operands(it, expr) - dereference_rec(*it, state); + dereference_rec(*it, state, write); } } @@ -348,7 +348,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state) /// dereferenced. If new objects are created by this mechanism then /// state will be altered (by `symex_assign`) to initialise them. /// See \ref auto_objects.cpp for details. -void goto_symext::dereference(exprt &expr, statet &state) +void goto_symext::dereference(exprt &expr, statet &state, bool write) { // The expression needs to be renamed to level 1 // in order to distinguish addresses of local variables @@ -358,7 +358,7 @@ void goto_symext::dereference(exprt &expr, statet &state) exprt l1_expr = state.rename(expr, ns).get(); // start the recursion! - dereference_rec(l1_expr, state); + dereference_rec(l1_expr, state, write); // dereferencing may introduce new symbol_exprt // (like __CPROVER_memory) expr = state.rename(std::move(l1_expr), ns).get(); From 6e514deeebdbd47493d218c37ef5692371bd7591 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 4 Apr 2019 17:09:40 +0000 Subject: [PATCH 02/10] Field-sensitive level-2 SSA renaming Struct member and array index expressions are replaced by new SSA symbols such that updates to individual fields of structs or arrays do not affect the SSA index of the remaining object. This enables more fine-grained constant propagation, which should be particularly beneficial to Java (where structs/classes prevail). --- .../constant_propagation/test.desc | 6 +- .../jbmc-strings/ClassName/test.desc | 2 - .../cbmc-concurrency/norace_struct1/test.desc | 2 +- .../struct_and_array1/test.desc | 2 +- regression/cbmc/Array_Propagation1/main.c | 21 ++ regression/cbmc/Array_Propagation1/test.desc | 9 + regression/cbmc/Struct_Propagation1/main.c | 34 +++ regression/cbmc/Struct_Propagation1/test.desc | 9 + .../test.desc | 7 +- .../nondet_elements_longer_lists/test.desc | 2 - .../test.desc | 2 - src/goto-symex/Makefile | 1 + src/goto-symex/field_sensitivity.cpp | 255 ++++++++++++++++++ src/goto-symex/field_sensitivity.h | 81 ++++++ src/goto-symex/goto_symex_state.cpp | 12 + src/goto-symex/goto_symex_state.h | 3 + src/goto-symex/symex_assign.cpp | 182 ++++++++++++- src/goto-symex/symex_dead.cpp | 33 +-- src/goto-symex/symex_dereference.cpp | 3 + src/goto-symex/symex_goto.cpp | 4 + src/util/ssa_expr.h | 10 + 21 files changed, 650 insertions(+), 30 deletions(-) create mode 100644 regression/cbmc/Array_Propagation1/main.c create mode 100644 regression/cbmc/Array_Propagation1/test.desc create mode 100644 regression/cbmc/Struct_Propagation1/main.c create mode 100644 regression/cbmc/Struct_Propagation1/test.desc create mode 100644 src/goto-symex/field_sensitivity.cpp create mode 100644 src/goto-symex/field_sensitivity.h diff --git a/jbmc/regression/jbmc-generics/constant_propagation/test.desc b/jbmc/regression/jbmc-generics/constant_propagation/test.desc index 31756c1b722..fa691b93648 100644 --- a/jbmc/regression/jbmc-generics/constant_propagation/test.desc +++ b/jbmc/regression/jbmc-generics/constant_propagation/test.desc @@ -3,8 +3,10 @@ Test.class --function Test.main --show-vcc ^EXIT=0$ ^SIGNAL=0$ -^\{-\d+\} symex_dynamic::dynamic_object1#3 = \{ \{ \{ "java::GenericSub" \}, NULL, 0 \} \}$ -^\{-\d+\} symex_dynamic::dynamic_object1#4 = \{ \{ \{ "java::GenericSub" \}, NULL, 5 \} \}$ +^\{-\d+\} symex_dynamic::dynamic_object1#2 = \{ \{ \{ "java::GenericSub" \}, NULL, 0 \} \}$ +^\{-\d+\} symex_dynamic::dynamic_object1#2\.@Generic\.@java.lang.Object\.@class_identifier = "java::GenericSub"$ +^\{-\d+\} symex_dynamic::dynamic_object1#2\.@Generic\.key = NULL$ +^\{-\d+\} symex_dynamic::dynamic_object1#3\.@Generic\.x = 5$ -- byte_extract_(big|little)_endian -- diff --git a/jbmc/regression/jbmc-strings/ClassName/test.desc b/jbmc/regression/jbmc-strings/ClassName/test.desc index 6a9bd1782a0..7e7dd4c1be3 100644 --- a/jbmc/regression/jbmc-strings/ClassName/test.desc +++ b/jbmc/regression/jbmc-strings/ClassName/test.desc @@ -4,5 +4,3 @@ test.class ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ -\[java::test\.main:\(\)V\.assertion\.1\] line 5 assertion at file test\.java line 5 function java::test\.main:\(\)V bytecode-index 8: SUCCESS$ -\[java::test\.main:\(\)V\.assertion\.2\] line 6 assertion at file test\.java line 6 function java::test\.main:\(\)V bytecode-index 19: SUCCESS$ diff --git a/regression/cbmc-concurrency/norace_struct1/test.desc b/regression/cbmc-concurrency/norace_struct1/test.desc index c239dca4b31..8ecf05b6d5c 100644 --- a/regression/cbmc-concurrency/norace_struct1/test.desc +++ b/regression/cbmc-concurrency/norace_struct1/test.desc @@ -1,4 +1,4 @@ -KNOWNBUG pthread +CORE pthread main.c ^EXIT=0$ diff --git a/regression/cbmc-concurrency/struct_and_array1/test.desc b/regression/cbmc-concurrency/struct_and_array1/test.desc index 52168c7eba4..9efefbc7362 100644 --- a/regression/cbmc-concurrency/struct_and_array1/test.desc +++ b/regression/cbmc-concurrency/struct_and_array1/test.desc @@ -1,4 +1,4 @@ -KNOWNBUG +CORE main.c ^EXIT=0$ diff --git a/regression/cbmc/Array_Propagation1/main.c b/regression/cbmc/Array_Propagation1/main.c new file mode 100644 index 00000000000..48d3a83933e --- /dev/null +++ b/regression/cbmc/Array_Propagation1/main.c @@ -0,0 +1,21 @@ +#include + +struct S +{ + int a; +}; + +int main() +{ + struct S s; + s.a = 1; + + assert(s.a == 1); + + int A[1]; + A[0] = 1; + + assert(A[0] == 1); + + return 0; +} diff --git a/regression/cbmc/Array_Propagation1/test.desc b/regression/cbmc/Array_Propagation1/test.desc new file mode 100644 index 00000000000..65535fe6331 --- /dev/null +++ b/regression/cbmc/Array_Propagation1/test.desc @@ -0,0 +1,9 @@ +CORE +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +(Starting CEGAR Loop|VCC\(s\), 0 remaining after simplification$) +-- +^warning: ignoring diff --git a/regression/cbmc/Struct_Propagation1/main.c b/regression/cbmc/Struct_Propagation1/main.c new file mode 100644 index 00000000000..56445e241b6 --- /dev/null +++ b/regression/cbmc/Struct_Propagation1/main.c @@ -0,0 +1,34 @@ +struct S +{ + int v; + struct Inner + { + int x; + } inner; +}; + +struct S works; + +int main() +{ + struct S fails; + + works.v = 2; + fails.v = 2; + + while(works.v > 0) + --(works.v); + + while(fails.v > 0) + --(fails.v); + + __CPROVER_assert(works.inner.x == 0, ""); + + _Bool b; + if(b) + { + struct S s = {42, {42}}; + } + + return 0; +} diff --git a/regression/cbmc/Struct_Propagation1/test.desc b/regression/cbmc/Struct_Propagation1/test.desc new file mode 100644 index 00000000000..42e6505e2d3 --- /dev/null +++ b/regression/cbmc/Struct_Propagation1/test.desc @@ -0,0 +1,9 @@ +CORE +main.c +--unwind 5 +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +(Starting CEGAR Loop|VCC\(s\), 0 remaining after simplification$) +-- +^warning: ignoring diff --git a/regression/cbmc/variable-access-to-constant-array/test.desc b/regression/cbmc/variable-access-to-constant-array/test.desc index ea0a082706e..d3fce1d598f 100644 --- a/regression/cbmc/variable-access-to-constant-array/test.desc +++ b/regression/cbmc/variable-access-to-constant-array/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +THOROUGH broken-smt-backend main.c ^EXIT=10$ @@ -6,5 +6,10 @@ main.c ^VERIFICATION FAILED$ -- ^warning: ignoring +-- This test actually passes when using the SMT2 back-end, but takes 68 seconds to do so. + +Field-sensitive encoding of array accesses for an array of 2^16 elements is very +expensive. We might consider some bounds up to which we actually do field +expansion. diff --git a/regression/goto-harness/nondet_elements_longer_lists/test.desc b/regression/goto-harness/nondet_elements_longer_lists/test.desc index 88dc31c264c..cb71825070a 100644 --- a/regression/goto-harness/nondet_elements_longer_lists/test.desc +++ b/regression/goto-harness/nondet_elements_longer_lists/test.desc @@ -1,8 +1,6 @@ CORE main.c --harness-type call-function --max-nondet-tree-depth 4 --min-null-tree-depth 1 --function test_function -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 0: SUCCESS -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 1: SUCCESS \[test_function.assertion.\d+\] line \d+ assertion list_walker->datum == \+\+i: SUCCESS ^EXIT=0$ ^SIGNAL=0$ diff --git a/regression/goto-harness/nondet_elements_longer_lists_global/test.desc b/regression/goto-harness/nondet_elements_longer_lists_global/test.desc index 22bfdb8bfd7..52218e7877d 100644 --- a/regression/goto-harness/nondet_elements_longer_lists_global/test.desc +++ b/regression/goto-harness/nondet_elements_longer_lists_global/test.desc @@ -1,8 +1,6 @@ CORE main.c --harness-type call-function --max-nondet-tree-depth 4 --min-null-tree-depth 1 --function test_function --nondet-globals -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 0: SUCCESS -\[test_function.unwind.\d+\] line \d+ unwinding assertion loop 1: SUCCESS \[test_function.assertion.\d+\] line \d+ assertion list_walker->datum == \+\+i: SUCCESS ^EXIT=0$ ^SIGNAL=0$ diff --git a/src/goto-symex/Makefile b/src/goto-symex/Makefile index c2b8d7b8871..7876b206a28 100644 --- a/src/goto-symex/Makefile +++ b/src/goto-symex/Makefile @@ -1,5 +1,6 @@ SRC = auto_objects.cpp \ build_goto_trace.cpp \ + field_sensitivity.cpp \ goto_state.cpp \ goto_symex.cpp \ goto_symex_state.cpp \ diff --git a/src/goto-symex/field_sensitivity.cpp b/src/goto-symex/field_sensitivity.cpp new file mode 100644 index 00000000000..caf2cd1f5a0 --- /dev/null +++ b/src/goto-symex/field_sensitivity.cpp @@ -0,0 +1,255 @@ +/*******************************************************************\ + +Module: Field-sensitive SSA + +Author: Michael Tautschnig + +\*******************************************************************/ + +#include "field_sensitivity.h" + +#include +#include +#include +#include + +#include "goto_symex_state.h" +#include "symex_target.h" + +void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) + const +{ + if(!run_apply) + return; + + if(expr.id() != ID_address_of) + { + Forall_operands(it, expr) + apply(ns, *it, write); + } + + if(expr.id() == ID_symbol && expr.get_bool(ID_C_SSA_symbol) && !write) + { + expr = get_fields(ns, to_ssa_expr(expr)); + } + else if( + !write && expr.id() == ID_member && + to_member_expr(expr).struct_op().id() == ID_struct) + { + simplify(expr, ns); + } + else if( + !write && expr.id() == ID_index && + to_index_expr(expr).array().id() == ID_array) + { + simplify(expr, ns); + } + else if(expr.id() == ID_member) + { + // turn a member-of-an-SSA-expression into a single SSA expression, thus + // encoding the member as an individual symbol rather than only the full + // struct + member_exprt &member = to_member_expr(expr); + + if( + member.struct_op().id() == ID_symbol && + member.struct_op().get_bool(ID_C_SSA_symbol) && + (member.struct_op().type().id() == ID_struct || + member.struct_op().type().id() == ID_struct_tag)) + { + // place the entire member expression, not just the struct operand, in an + // SSA expression + ssa_exprt tmp = to_ssa_expr(member.struct_op()); + member.struct_op() = tmp.get_original_expr(); + tmp.set_expression(member); + + expr.swap(tmp); + } + } + else if(expr.id() == ID_index) + { + // turn a index-of-an-SSA-expression into a single SSA expression, thus + // encoding the index access into an array as an individual symbol rather + // than only the full array + index_exprt &index = to_index_expr(expr); + simplify(index.index(), ns); + + if( + index.array().id() == ID_symbol && + index.array().get_bool(ID_C_SSA_symbol) && + index.array().type().id() == ID_array && + index.index().id() == ID_constant) + { + // place the entire index expression, not just the array operand, in an + // SSA expression + ssa_exprt tmp = to_ssa_expr(index.array()); + index.array() = tmp.get_original_expr(); + tmp.set_expression(index); + + expr.swap(tmp); + } + } +} + +exprt field_sensitivityt::get_fields( + const namespacet &ns, + const ssa_exprt &ssa_expr) +{ + if(ssa_expr.type().id() == ID_struct || ssa_expr.type().id() == ID_struct_tag) + { + const struct_typet &type = to_struct_type(ns.follow(ssa_expr.type())); + const struct_union_typet::componentst &components = type.components(); + + struct_exprt result(ssa_expr.type()); + result.reserve_operands(components.size()); + + const exprt &struct_op = ssa_expr.get_original_expr(); + + for(const auto &comp : components) + { + const member_exprt member(struct_op, comp.get_name(), comp.type()); + ssa_exprt tmp = ssa_expr; + tmp.set_expression(member); + result.copy_to_operands(get_fields(ns, tmp)); + } + + return std::move(result); + } + else if( + ssa_expr.type().id() == ID_array && + to_array_type(ssa_expr.type()).size().id() == ID_constant) + { + const array_typet &type = to_array_type(ssa_expr.type()); + const std::size_t array_size = + numeric_cast_v(to_constant_expr(type.size())); + + array_exprt result(type); + result.reserve_operands(array_size); + + const exprt &array = ssa_expr.get_original_expr(); + + for(std::size_t i = 0; i < array_size; ++i) + { + const index_exprt index(array, from_integer(i, index_type())); + ssa_exprt tmp = ssa_expr; + tmp.set_expression(index); + result.copy_to_operands(get_fields(ns, tmp)); + } + + return std::move(result); + } + else + return ssa_expr; +} + +void field_sensitivityt::field_assignments( + const namespacet &ns, + goto_symex_statet &state, + const exprt &lhs, + symex_targett &target) +{ + exprt lhs_fs = lhs; + apply(ns, lhs_fs, false); + + bool run_apply_bak = run_apply; + run_apply = false; + field_assignments_rec(ns, state, lhs_fs, lhs, target); + run_apply = run_apply_bak; +} + +/// Assign to the individual fields \p lhs_fs of a non-expanded symbol \p lhs. +/// This is required whenever prior steps have updated the full object rather +/// than individual fields, e.g., in case of assignments to an array at an +/// unknown index. +/// \param ns: a namespace to resolve type symbols/tag types +/// \param state: symbolic execution state +/// \param lhs_fs: expanded symbol +/// \param lhs: non-expanded symbol +/// \param target: symbolic execution equation store +void field_sensitivityt::field_assignments_rec( + const namespacet &ns, + goto_symex_statet &state, + const exprt &lhs_fs, + const exprt &lhs, + symex_targett &target) +{ + if(lhs == lhs_fs) + return; + else if(lhs_fs.id() == ID_symbol && lhs_fs.get_bool(ID_C_SSA_symbol)) + { + exprt ssa_rhs = state.rename(lhs, ns).get(); + simplify(ssa_rhs, ns); + + ssa_exprt ssa_lhs = to_ssa_expr(lhs_fs); + state.assignment(ssa_lhs, ssa_rhs, ns, true, true); + + // do the assignment + target.assignment( + state.guard.as_expr(), + ssa_lhs, + ssa_lhs, + ssa_lhs.get_original_expr(), + ssa_rhs, + state.source, + symex_targett::assignment_typet::STATE); + } + else if(lhs.type().id() == ID_struct || lhs.type().id() == ID_struct_tag) + { + const struct_typet &type = to_struct_type(ns.follow(lhs.type())); + const struct_union_typet::componentst &components = type.components(); + + PRECONDITION( + components.empty() || + (lhs_fs.has_operands() && lhs_fs.operands().size() == components.size())); + + exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); + for(const auto &comp : components) + { + const member_exprt member_rhs(lhs, comp.get_name(), comp.type()); + const exprt &member_lhs = *fs_it; + + field_assignments_rec(ns, state, member_lhs, member_rhs, target); + ++fs_it; + } + } + else if(const auto &type = type_try_dynamic_cast(lhs.type())) + { + const std::size_t array_size = + numeric_cast_v(to_constant_expr(type->size())); + PRECONDITION( + lhs_fs.has_operands() && lhs_fs.operands().size() == array_size); + + exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); + for(std::size_t i = 0; i < array_size; ++i) + { + const index_exprt index_rhs(lhs, from_integer(i, index_type())); + const exprt &index_lhs = *fs_it; + + field_assignments_rec(ns, state, index_lhs, index_rhs, target); + ++fs_it; + } + } + else if(lhs_fs.has_operands()) + { + PRECONDITION( + lhs.has_operands() && lhs_fs.operands().size() == lhs.operands().size()); + + exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); + for(const exprt &op : lhs.operands()) + { + field_assignments_rec(ns, state, *fs_it, op, target); + ++fs_it; + } + } + else + { + UNREACHABLE; + } +} + +bool field_sensitivityt::is_divisible( + const namespacet &ns, + const ssa_exprt &expr) +{ + return expr != get_fields(ns, expr); +} diff --git a/src/goto-symex/field_sensitivity.h b/src/goto-symex/field_sensitivity.h new file mode 100644 index 00000000000..753e19ed9f0 --- /dev/null +++ b/src/goto-symex/field_sensitivity.h @@ -0,0 +1,81 @@ +/*******************************************************************\ + +Module: Field-sensitive SSA + +Author: Michael Tautschnig + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H +#define CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H + +class exprt; +class ssa_exprt; +class namespacet; +class goto_symex_statet; +class symex_targett; + +/// \brief Control granularity of object accesses +class field_sensitivityt +{ +public: + /// Assign to the individual fields of a non-expanded symbol \p lhs. This is + /// required whenever prior steps have updated the full object rather than + /// individual fields, e.g., in case of assignments to an array at an unknown + /// index. + /// \param ns: a namespace to resolve type symbols/tag types + /// \param state: symbolic execution state + /// \param lhs: non-expanded symbol + /// \param target: symbolic execution equation store + void field_assignments( + const namespacet &ns, + goto_symex_statet &state, + const exprt &lhs, + symex_targett &target); + + /// Turn an expression \p expr into a field-sensitive SSA expression. + /// Field-sensitive SSA expressions have individual symbols for each + /// component. While the exact granularity is an implementation detail, + /// possible implementations translate a struct-typed symbol into a struct of + /// member expressions, or an array-typed symbol into an array of index + /// expressions. Such expansions are not possible when the symbol is to be + /// written to (i.e., \p write is true); in such a case only translations from + /// existing member or index expressions into symbols are performed. + /// \param ns: a namespace to resolve type symbols/tag types + /// \param [in,out] expr: an expression to be (recursively) transformed - this + /// parameter is both input and output. + /// \param write: set to true if the expression is to be used as an lvalue. + void apply(const namespacet &ns, exprt &expr, bool write) const; + + /// Compute an expression representing the individual components of a + /// field-sensitive SSA representation of \p ssa_expr. + /// \param ns: a namespace to resolve type symbols/tag types + /// \param ssa_expr: the expression to evaluate + /// \return Expanded expression; for example, for a \p ssa_expr of some struct + /// type, a `struct_exprt` with each component now being an SSA expression + /// is built. + static exprt get_fields(const namespacet &ns, const ssa_exprt &ssa_expr); + + /// Determine whether \p expr would translate to an atomic SSA expression + /// (returns false) or a composite object made of several SSA expressions as + /// components (such as a struct with each member becoming an individual SSA + /// expression, return true in this case). + /// \param ns: a namespace to resolve type symbols/tag types + /// \param expr: the expression to evaluate + /// \return False, if and only if, \p expr would be a single field-sensitive + /// SSA expression. + static bool is_divisible(const namespacet &ns, const ssa_exprt &expr); + +private: + /// whether or not to invoke \ref field_sensitivityt::apply + bool run_apply = true; + + void field_assignments_rec( + const namespacet &ns, + goto_symex_statet &state, + const exprt &lhs_fs, + const exprt &lhs, + symex_targett &target); +}; + +#endif // CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index a3ba83bc5f4..dd7839ef5eb 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -334,6 +334,10 @@ goto_symex_statet::rename(exprt expr, const namespacet &ns) c_expr.type() == to_if_expr(c_expr).false_case().type())), "Type of renamed expr should be the same as operands for with_exprt and " "if_exprt"); + + if(level == L2) + field_sensitivity.apply(ns, expr, false); + return renamedt{std::move(expr)}; } } @@ -360,6 +364,10 @@ bool goto_symex_statet::l2_thread_read_encoding( return false; } + // only continue if an indivisible object is being accessed + if(field_sensitivityt::is_divisible(ns, expr)) + return false; + ssa_exprt ssa_l1=expr; ssa_l1.remove_level_2(); const irep_idt &l1_identifier=ssa_l1.get_identifier(); @@ -485,6 +493,10 @@ goto_symex_statet::write_is_shared_resultt goto_symex_statet::write_is_shared( return write_is_shared_resultt::NOT_SHARED; } + // only continue if an indivisible object is being accessed + if(field_sensitivityt::is_divisible(ns, expr)) + return write_is_shared_resultt::NOT_SHARED; + if(atomic_section_id != 0) return write_is_shared_resultt::IN_ATOMIC_SECTION; diff --git a/src/goto-symex/goto_symex_state.h b/src/goto-symex/goto_symex_state.h index 47785b08a72..0b00bad3fd1 100644 --- a/src/goto-symex/goto_symex_state.h +++ b/src/goto-symex/goto_symex_state.h @@ -25,6 +25,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "call_stack.h" +#include "field_sensitivity.h" #include "frame.h" #include "goto_state.h" #include "renaming_level.h" @@ -115,6 +116,8 @@ class goto_symex_statet final : public goto_statet bool record_value, bool allow_pointer_unsoundness=false); + field_sensitivityt field_sensitivity; + protected: template void rename_address(exprt &expr, const namespacet &ns); diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index 963831e5bc1..34df887f1a9 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -16,9 +16,13 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include "goto_symex_state.h" +// We can either use with_exprt or update_exprt when building expressions that +// modify components of an array or a struct. Set USE_UPDATE to use +// update_exprt. // #define USE_UPDATE void goto_symext::symex_assign(statet &state, const code_assignt &code) @@ -198,6 +202,167 @@ void goto_symext::symex_assign_rec( "assignment to `" + lhs.id_string() + "' not handled"); } +/// Replace "with" (or "update") expressions in \p ssa_rhs by their update +/// values and move the index or member to the left-hand side \p lhs_mod. This +/// effectively undoes the work that \ref goto_symext::symex_assign_array and +/// \ref goto_symext::symex_assign_struct_member have done, but now making use +/// of the index/member that may only be known after renaming to L2 has taken +/// place. +/// \param [in,out] ssa_rhs: right-hand side +/// \param [in,out] lhs_mod: left-hand side +/// \param ns: namespace +/// \param field_sensitivity: field sensitivity object to turn left-hand side +/// into individual fields +static void rewrite_with_to_field_symbols( + exprt &ssa_rhs, + ssa_exprt &lhs_mod, + const namespacet &ns, + const field_sensitivityt &field_sensitivity) +{ +#ifdef USE_UPDATE + while(ssa_rhs.id() == ID_update && + to_update_expr(ssa_rhs).designator().size() == 1 && + (lhs_mod.type().id() == ID_array || lhs_mod.type().id() == ID_struct || + lhs_mod.type().id() == ID_struct_tag)) + { + exprt field_sensitive_lhs; + const update_exprt &update = to_update_expr(ssa_rhs); + PRECONDITION(update.designator().size() == 1); + const exprt &designator = update.designator().front(); + + if(lhs_mod.type().id() == ID_array) + { + field_sensitive_lhs = + index_exprt(lhs_mod, to_index_designator(designator).index()); + } + else + { + field_sensitive_lhs = member_exprt( + lhs_mod, + to_member_designator(designator).get_component_name(), + update.new_value().type()); + } + + field_sensitivity.apply(field_sensitive_lhs, true); + + if(field_sensitive_lhs.id() != ID_symbol) + break; + + ssa_rhs = update.new_value(); + lhs_mod = to_ssa_expr(field_sensitive_lhs); + } +#else + while(ssa_rhs.id() == ID_with && + to_with_expr(ssa_rhs).operands().size() == 3 && + (lhs_mod.type().id() == ID_array || lhs_mod.type().id() == ID_struct || + lhs_mod.type().id() == ID_struct_tag)) + { + exprt field_sensitive_lhs; + const with_exprt &with_expr = to_with_expr(ssa_rhs); + + if(lhs_mod.type().id() == ID_array) + { + field_sensitive_lhs = index_exprt(lhs_mod, with_expr.where()); + } + else + { + field_sensitive_lhs = member_exprt( + lhs_mod, + with_expr.where().get(ID_component_name), + with_expr.new_value().type()); + } + + field_sensitivity.apply(ns, field_sensitive_lhs, true); + + if(field_sensitive_lhs.id() != ID_symbol) + break; + + ssa_rhs = with_expr.new_value(); + lhs_mod = to_ssa_expr(field_sensitive_lhs); + } +#endif +} + +/// Replace byte-update operations that only affect individual fields of an +/// expression by assignments to just those fields. May generate "with" (or +/// "update") expressions, which \ref rewrite_with_to_field_symbols will then +/// take care of. +/// \param [in,out] ssa_rhs: right-hand side +/// \param [in,out] lhs_mod: left-hand side +/// \param ns: namespace +/// \param do_simplify: set to true if, and only if, simplification is enabled +static void shift_indexed_access_to_lhs( + exprt &ssa_rhs, + ssa_exprt &lhs_mod, + const namespacet &ns, + const field_sensitivityt &field_sensitivity, + bool do_simplify) +{ + if( + ssa_rhs.id() == ID_byte_update_little_endian || + ssa_rhs.id() == ID_byte_update_big_endian) + { + byte_update_exprt &byte_update = to_byte_update_expr(ssa_rhs); + exprt byte_extract = byte_extract_exprt( + byte_update.id() == ID_byte_update_big_endian + ? ID_byte_extract_big_endian + : ID_byte_extract_little_endian, + lhs_mod, + byte_update.offset(), + byte_update.value().type()); + if(do_simplify) + simplify(byte_extract, ns); + + if(byte_extract.id() == ID_symbol) + { + ssa_rhs = byte_update.value(); + lhs_mod = to_ssa_expr(byte_extract); + } + else if(byte_extract.id() == ID_index || byte_extract.id() == ID_member) + { + ssa_rhs = byte_update.value(); + + while(byte_extract.id() == ID_index || byte_extract.id() == ID_member) + { + if(byte_extract.id() == ID_index) + { + index_exprt &idx = to_index_expr(byte_extract); + +#ifdef USE_UPDATE + update_exprt new_rhs{idx.array(), exprt{}, ssa_rhs}; + new_rhs.designator().push_back(index_designatort{idx.index()}); +#else + with_exprt new_rhs{idx.array(), idx.index(), ssa_rhs}; +#endif + + ssa_rhs = new_rhs; + byte_extract = idx.array(); + } + else + { + member_exprt &member = to_member_expr(byte_extract); + const irep_idt &component_name = member.get_component_name(); + +#ifdef USE_UPDATE + update_exprt new_rhs{member.compound(), exprt{}, ssa_rhs}; + new_rhs.designator().push_back(member_designatort{component_name}); +#else + with_exprt new_rhs(member.compound(), exprt(ID_member_name), ssa_rhs); + new_rhs.where().set(ID_component_name, component_name); +#endif + + ssa_rhs = new_rhs; + byte_extract = member.compound(); + } + } + + lhs_mod = to_ssa_expr(byte_extract); + } + } + + rewrite_with_to_field_symbols(ssa_rhs, lhs_mod, ns, field_sensitivity); +} + void goto_symext::symex_assign_symbol( statet &state, const ssa_exprt &lhs, // L1 @@ -216,9 +381,15 @@ void goto_symext::symex_assign_symbol( } exprt l2_rhs = state.rename(std::move(ssa_rhs), ns).get(); + + ssa_exprt lhs_mod = lhs; + + shift_indexed_access_to_lhs( + l2_rhs, lhs_mod, ns, state.field_sensitivity, symex_config.simplify_opt); + do_simplify(l2_rhs); - ssa_exprt ssa_lhs=lhs; + ssa_exprt ssa_lhs = lhs_mod; state.assignment( ssa_lhs, l2_rhs, @@ -235,8 +406,7 @@ void goto_symext::symex_assign_symbol( state.record_events=record_events; // do the assignment - const symbolt &symbol = - ns.lookup(to_symbol_expr(ssa_lhs.get_original_expr())); + const symbolt &symbol = ns.lookup(ssa_lhs.get_object_name()); if(symbol.is_auxiliary) assignment_type=symex_targett::assignment_typet::HIDDEN; @@ -254,15 +424,19 @@ void goto_symext::symex_assign_symbol( // Temporarily add the state guard guard.emplace_back(state.guard.as_expr()); + exprt original_lhs = l2_full_lhs; + get_original_name(original_lhs); target.assignment( conjunction(guard), ssa_lhs, l2_full_lhs, - add_to_lhs(full_lhs, ssa_lhs.get_original_expr()), + original_lhs, l2_rhs, state.source, assignment_type); + state.field_sensitivity.field_assignments(ns, state, lhs_mod, target); + // Restore the guard guard.pop_back(); } diff --git a/src/goto-symex/symex_dead.cpp b/src/goto-symex/symex_dead.cpp index dd6152e41df..c036aa5e516 100644 --- a/src/goto-symex/symex_dead.cpp +++ b/src/goto-symex/symex_dead.cpp @@ -11,6 +11,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "goto_symex.h" +#include #include #include @@ -22,23 +23,25 @@ void goto_symext::symex_dead(statet &state) const code_deadt &code = instruction.get_dead(); ssa_exprt ssa = state.rename_ssa(ssa_exprt{code.symbol()}, ns).get(); - const irep_idt &l1_identifier = ssa.get_identifier(); - // we cannot remove the object from the L1 renaming map, because L1 renaming - // information is not local to a path, but removing it from the propagation - // map and value-set is safe as 1) it is local to a path and 2) this instance - // can no longer appear + const exprt fields = field_sensitivityt::get_fields(ns, ssa); + find_symbols_sett fields_set; + find_symbols(fields, fields_set); - if(state.value_set.values.has_key(l1_identifier)) + for(const irep_idt &l1_identifier : fields_set) { - state.value_set.values.erase(l1_identifier); + // We cannot remove the object from the L1 renaming map, because L1 renaming + // information is not local to a path, but removing it from the propagation + // map and value-set is safe as 1) it is local to a path and 2) this + // instance can no longer appear. + if(state.value_set.values.has_key(l1_identifier)) + state.value_set.values.erase(l1_identifier); + state.propagation.erase(l1_identifier); + // Remove from the local L2 renaming map; this means any reads from the dead + // identifier will use generation 0 (e.g. x!N@M#0, where N and M are + // positive integers), which is never defined by any write, and will be + // dropped by `goto_symext::merge_goto` on merging with branches where the + // identifier is still live. + state.drop_l1_name(l1_identifier); } - - state.propagation.erase(l1_identifier); - // Remove from the local L2 renaming map; this means any reads from the dead - // identifier will use generation 0 (e.g. x!N@M#0, where N and M are positive - // integers), which is never defined by any write, and will be dropped by - // `goto_symext::merge_goto` on merging with branches where the identifier - // is still live. - state.drop_l1_name(l1_identifier); } diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index df03ade1dfb..c11c777cc27 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -356,6 +356,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write) // symbols whose address is taken. PRECONDITION(!state.call_stack().empty()); exprt l1_expr = state.rename(expr, ns).get(); + state.field_sensitivity.apply(ns, l1_expr, write); // start the recursion! dereference_rec(l1_expr, state, write); @@ -388,4 +389,6 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write) !has_subexpr(expr, ID_dereference), "simplify re-introduced dereferencing"); } + + state.field_sensitivity.apply(ns, expr, write); } diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index 667c911c12f..dea5eab6533 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -493,6 +493,10 @@ static void merge_names( return; } + // field sensitivity: only merge on individual fields + if(field_sensitivityt::is_divisible(ns, ssa)) + return; + // shared variables are renamed on every access anyway, we don't need to // merge anything const symbolt &symbol = ns.lookup(obj_identifier); diff --git a/src/util/ssa_expr.h b/src/util/ssa_expr.h index 23ee49c1756..364d403dcfe 100644 --- a/src/util/ssa_expr.h +++ b/src/util/ssa_expr.h @@ -35,6 +35,16 @@ class ssa_exprt:public symbol_exprt return static_cast(find(ID_expression)); } + /// Replace the underlying, original expression by \p expr while maintaining + /// SSA indices. + /// \param expr: expression to store + void set_expression(const exprt &expr) + { + type() = expr.type(); + add(ID_expression, expr); + update_identifier(); + } + irep_idt get_object_name() const { const exprt &original_expr = get_original_expr(); From ca9fcb793a35b8a6aaec15610802162625678525 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 11 Mar 2019 13:43:50 +0000 Subject: [PATCH 03/10] Temporarily disable field-sensitivity for arrays The Array_Propagation1 test is no longer solved during symex in this case. --- .../cbmc-concurrency/struct_and_array1/test.desc | 2 +- regression/cbmc/Array_Propagation1/test.desc | 6 +++++- .../cbmc/variable-access-to-constant-array/test.desc | 4 ++-- src/goto-symex/field_sensitivity.cpp | 10 ++++++++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/regression/cbmc-concurrency/struct_and_array1/test.desc b/regression/cbmc-concurrency/struct_and_array1/test.desc index 9efefbc7362..52168c7eba4 100644 --- a/regression/cbmc-concurrency/struct_and_array1/test.desc +++ b/regression/cbmc-concurrency/struct_and_array1/test.desc @@ -1,4 +1,4 @@ -CORE +KNOWNBUG main.c ^EXIT=0$ diff --git a/regression/cbmc/Array_Propagation1/test.desc b/regression/cbmc/Array_Propagation1/test.desc index 65535fe6331..a1d9fddba86 100644 --- a/regression/cbmc/Array_Propagation1/test.desc +++ b/regression/cbmc/Array_Propagation1/test.desc @@ -1,4 +1,4 @@ -CORE +KNOWNBUG main.c ^EXIT=0$ @@ -7,3 +7,7 @@ main.c (Starting CEGAR Loop|VCC\(s\), 0 remaining after simplification$) -- ^warning: ignoring +-- +This requires field-sensitive SSA encoding of arrays to pass entirely via +constant propagation (changing the above condition to "1 remaining after +simplification" would also make it pass, but this isn't the point of the test). diff --git a/regression/cbmc/variable-access-to-constant-array/test.desc b/regression/cbmc/variable-access-to-constant-array/test.desc index d3fce1d598f..7fa81e26cff 100644 --- a/regression/cbmc/variable-access-to-constant-array/test.desc +++ b/regression/cbmc/variable-access-to-constant-array/test.desc @@ -1,4 +1,4 @@ -THOROUGH broken-smt-backend +CORE broken-smt-backend main.c ^EXIT=10$ @@ -12,4 +12,4 @@ do so. Field-sensitive encoding of array accesses for an array of 2^16 elements is very expensive. We might consider some bounds up to which we actually do field -expansion. +expansion, or at least need to mark this test as "THOROUGH." diff --git a/src/goto-symex/field_sensitivity.cpp b/src/goto-symex/field_sensitivity.cpp index caf2cd1f5a0..82dcb534061 100644 --- a/src/goto-symex/field_sensitivity.cpp +++ b/src/goto-symex/field_sensitivity.cpp @@ -16,6 +16,8 @@ Author: Michael Tautschnig #include "goto_symex_state.h" #include "symex_target.h" +// #define ENABLE_ARRAY_FIELD_SENSITIVITY + void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) const { @@ -38,12 +40,14 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) { simplify(expr, ns); } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY else if( !write && expr.id() == ID_index && to_index_expr(expr).array().id() == ID_array) { simplify(expr, ns); } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY else if(expr.id() == ID_member) { // turn a member-of-an-SSA-expression into a single SSA expression, thus @@ -66,6 +70,7 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) expr.swap(tmp); } } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY else if(expr.id() == ID_index) { // turn a index-of-an-SSA-expression into a single SSA expression, thus @@ -89,6 +94,7 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) expr.swap(tmp); } } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY } exprt field_sensitivityt::get_fields( @@ -115,6 +121,7 @@ exprt field_sensitivityt::get_fields( return std::move(result); } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY else if( ssa_expr.type().id() == ID_array && to_array_type(ssa_expr.type()).size().id() == ID_constant) @@ -138,6 +145,7 @@ exprt field_sensitivityt::get_fields( return std::move(result); } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY else return ssa_expr; } @@ -212,6 +220,7 @@ void field_sensitivityt::field_assignments_rec( ++fs_it; } } +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY else if(const auto &type = type_try_dynamic_cast(lhs.type())) { const std::size_t array_size = @@ -229,6 +238,7 @@ void field_sensitivityt::field_assignments_rec( ++fs_it; } } +#endif // ENABLE_ARRAY_FIELD_SENSITIVITY else if(lhs_fs.has_operands()) { PRECONDITION( From 9577be9b35549a259255163e92b10f71c6e411d4 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 11 Mar 2019 13:43:51 +0000 Subject: [PATCH 04/10] Use `..` and `[[]]` for SSA expression field separator This makes it easier to read the difference between an atomic symbol and a field of a composite. --- .../jbmc-generics/constant_propagation/test.desc | 6 +++--- src/util/ssa_expr.cpp | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/jbmc/regression/jbmc-generics/constant_propagation/test.desc b/jbmc/regression/jbmc-generics/constant_propagation/test.desc index fa691b93648..6345893ca5a 100644 --- a/jbmc/regression/jbmc-generics/constant_propagation/test.desc +++ b/jbmc/regression/jbmc-generics/constant_propagation/test.desc @@ -4,9 +4,9 @@ Test.class ^EXIT=0$ ^SIGNAL=0$ ^\{-\d+\} symex_dynamic::dynamic_object1#2 = \{ \{ \{ "java::GenericSub" \}, NULL, 0 \} \}$ -^\{-\d+\} symex_dynamic::dynamic_object1#2\.@Generic\.@java.lang.Object\.@class_identifier = "java::GenericSub"$ -^\{-\d+\} symex_dynamic::dynamic_object1#2\.@Generic\.key = NULL$ -^\{-\d+\} symex_dynamic::dynamic_object1#3\.@Generic\.x = 5$ +^\{-\d+\} symex_dynamic::dynamic_object1#2\.\.@Generic\.\.@java.lang.Object\.\.@class_identifier = "java::GenericSub"$ +^\{-\d+\} symex_dynamic::dynamic_object1#2\.\.@Generic\.\.key = NULL$ +^\{-\d+\} symex_dynamic::dynamic_object1#3\.\.@Generic\.\.x = 5$ -- byte_extract_(big|little)_endian -- diff --git a/src/util/ssa_expr.cpp b/src/util/ssa_expr.cpp index ccd065bb79e..9ac1f664c4f 100644 --- a/src/util/ssa_expr.cpp +++ b/src/util/ssa_expr.cpp @@ -16,7 +16,7 @@ Author: Daniel Kroening, kroening@kroening.com /// If \p expr is a symbol "s" add to \p os "s!l0@l1#l2" and to \p l1_object_os /// "s!l0@l1". /// If \p expr is a member or index expression, recursively apply the procedure -/// and add ".component_name" or "[index]" to \p os. +/// and add "..component_name" or "[[index]]" to \p os. static void build_ssa_identifier_rec( const exprt &expr, const irep_idt &l0, @@ -31,8 +31,8 @@ static void build_ssa_identifier_rec( build_ssa_identifier_rec(member.struct_op(), l0, l1, l2, os, l1_object_os); - os << '.' << member.get_component_name(); - l1_object_os << '.' << member.get_component_name(); + os << ".." << member.get_component_name(); + l1_object_os << ".." << member.get_component_name(); } else if(expr.id()==ID_index) { @@ -42,8 +42,8 @@ static void build_ssa_identifier_rec( const mp_integer idx = numeric_cast_v(to_constant_expr(index.index())); - os << '[' << idx << ']'; - l1_object_os << '[' << idx << ']'; + os << "[[" << idx << "]]"; + l1_object_os << "[[" << idx << "]]"; } else if(expr.id()==ID_symbol) { From 52d3003b4dee4865a8dff3b34e6b451046518144 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 11 Mar 2019 13:43:49 +0000 Subject: [PATCH 05/10] Field sensitivity: avoid inheriting L2 renaming from struct symbols Previously this could turn e.g. struct@x#y into struct.field@x#y, regardless of whether the L2 generation #y for that field existed or not. This meant that when an L2 name was passed through field-sensitivity expansion it could end up referring to an unbound symbol, whereas an L1 name would remain coherent (e.g. struct@x -> struct.field@x), later being renamed using the ordinary level2_names map. That means the generation numbers for structs and their fields may get out of sync, but that should be harmless so long as "latest generation" is used consistently. We can now also remove constant propagator lookup after applying field sensitivity, because field_sensitivity::apply uses state.rename to apply L2 renaming, which itself consults the constant propagator. Also make is_divisible test cheaper, at the expense that synchronisation with get_fields needs to be maintained manually. --- src/goto-symex/field_sensitivity.cpp | 66 +++++++++++++++++++++------- src/goto-symex/field_sensitivity.h | 14 ++++-- src/goto-symex/goto_symex_state.cpp | 6 +-- src/goto-symex/symex_assign.cpp | 17 ++++--- src/goto-symex/symex_dead.cpp | 2 +- src/goto-symex/symex_dereference.cpp | 4 +- src/goto-symex/symex_goto.cpp | 2 +- 7 files changed, 75 insertions(+), 36 deletions(-) diff --git a/src/goto-symex/field_sensitivity.cpp b/src/goto-symex/field_sensitivity.cpp index 82dcb534061..2f5adc4a3e6 100644 --- a/src/goto-symex/field_sensitivity.cpp +++ b/src/goto-symex/field_sensitivity.cpp @@ -18,8 +18,11 @@ Author: Michael Tautschnig // #define ENABLE_ARRAY_FIELD_SENSITIVITY -void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) - const +void field_sensitivityt::apply( + const namespacet &ns, + goto_symex_statet &state, + exprt &expr, + bool write) const { if(!run_apply) return; @@ -27,12 +30,12 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) if(expr.id() != ID_address_of) { Forall_operands(it, expr) - apply(ns, *it, write); + apply(ns, state, *it, write); } if(expr.id() == ID_symbol && expr.get_bool(ID_C_SSA_symbol) && !write) { - expr = get_fields(ns, to_ssa_expr(expr)); + expr = get_fields(ns, state, to_ssa_expr(expr)); } else if( !write && expr.id() == ID_member && @@ -64,10 +67,15 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) // place the entire member expression, not just the struct operand, in an // SSA expression ssa_exprt tmp = to_ssa_expr(member.struct_op()); + bool was_l2 = !tmp.get_level_2().empty(); + + tmp.remove_level_2(); member.struct_op() = tmp.get_original_expr(); tmp.set_expression(member); - - expr.swap(tmp); + if(was_l2) + expr = state.rename(tmp, ns).get(); + else + expr.swap(tmp); } } #ifdef ENABLE_ARRAY_FIELD_SENSITIVITY @@ -88,10 +96,15 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) // place the entire index expression, not just the array operand, in an // SSA expression ssa_exprt tmp = to_ssa_expr(index.array()); + bool was_l2 = !tmp.get_level_2().empty(); + + tmp.remove_level_2(); index.array() = tmp.get_original_expr(); tmp.set_expression(index); - - expr.swap(tmp); + if(was_l2) + expr = state.rename(tmp, ns).get(); + else + expr.swap(tmp); } } #endif // ENABLE_ARRAY_FIELD_SENSITIVITY @@ -99,7 +112,8 @@ void field_sensitivityt::apply(const namespacet &ns, exprt &expr, bool write) exprt field_sensitivityt::get_fields( const namespacet &ns, - const ssa_exprt &ssa_expr) + goto_symex_statet &state, + const ssa_exprt &ssa_expr) const { if(ssa_expr.type().id() == ID_struct || ssa_expr.type().id() == ID_struct_tag) { @@ -115,8 +129,13 @@ exprt field_sensitivityt::get_fields( { const member_exprt member(struct_op, comp.get_name(), comp.type()); ssa_exprt tmp = ssa_expr; + bool was_l2 = !tmp.get_level_2().empty(); + tmp.remove_level_2(); tmp.set_expression(member); - result.copy_to_operands(get_fields(ns, tmp)); + if(was_l2) + result.add_to_operands(state.rename(tmp, ns).get()); + else + result.add_to_operands(get_fields(ns, state, tmp)); } return std::move(result); @@ -139,8 +158,13 @@ exprt field_sensitivityt::get_fields( { const index_exprt index(array, from_integer(i, index_type())); ssa_exprt tmp = ssa_expr; + bool was_l2 = !tmp.get_level_2().empty(); + tmp.remove_level_2(); tmp.set_expression(index); - result.copy_to_operands(get_fields(ns, tmp)); + if(was_l2) + result.add_to_operands(state.rename(tmp, ns).get()); + else + result.add_to_operands(get_fields(ns, state, tmp)); } return std::move(result); @@ -157,7 +181,7 @@ void field_sensitivityt::field_assignments( symex_targett &target) { exprt lhs_fs = lhs; - apply(ns, lhs_fs, false); + apply(ns, state, lhs_fs, false); bool run_apply_bak = run_apply; run_apply = false; @@ -257,9 +281,19 @@ void field_sensitivityt::field_assignments_rec( } } -bool field_sensitivityt::is_divisible( - const namespacet &ns, - const ssa_exprt &expr) +bool field_sensitivityt::is_divisible(const ssa_exprt &expr) { - return expr != get_fields(ns, expr); + if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag) + return true; + +#ifdef ENABLE_ARRAY_FIELD_SENSITIVITY + if( + expr.type().id() == ID_array && + to_array_type(expr.type()).size().id() == ID_constant) + { + return true; + } +#endif + + return false; } diff --git a/src/goto-symex/field_sensitivity.h b/src/goto-symex/field_sensitivity.h index 753e19ed9f0..ab7747a7b5a 100644 --- a/src/goto-symex/field_sensitivity.h +++ b/src/goto-symex/field_sensitivity.h @@ -42,29 +42,35 @@ class field_sensitivityt /// written to (i.e., \p write is true); in such a case only translations from /// existing member or index expressions into symbols are performed. /// \param ns: a namespace to resolve type symbols/tag types + /// \param [in,out] state: symbolic execution state /// \param [in,out] expr: an expression to be (recursively) transformed - this /// parameter is both input and output. /// \param write: set to true if the expression is to be used as an lvalue. - void apply(const namespacet &ns, exprt &expr, bool write) const; + void + apply(const namespacet &ns, goto_symex_statet &state, exprt &expr, bool write) + const; /// Compute an expression representing the individual components of a /// field-sensitive SSA representation of \p ssa_expr. /// \param ns: a namespace to resolve type symbols/tag types + /// \param [in,out] state: symbolic execution state /// \param ssa_expr: the expression to evaluate /// \return Expanded expression; for example, for a \p ssa_expr of some struct /// type, a `struct_exprt` with each component now being an SSA expression /// is built. - static exprt get_fields(const namespacet &ns, const ssa_exprt &ssa_expr); + exprt get_fields( + const namespacet &ns, + goto_symex_statet &state, + const ssa_exprt &ssa_expr) const; /// Determine whether \p expr would translate to an atomic SSA expression /// (returns false) or a composite object made of several SSA expressions as /// components (such as a struct with each member becoming an individual SSA /// expression, return true in this case). - /// \param ns: a namespace to resolve type symbols/tag types /// \param expr: the expression to evaluate /// \return False, if and only if, \p expr would be a single field-sensitive /// SSA expression. - static bool is_divisible(const namespacet &ns, const ssa_exprt &expr); + static bool is_divisible(const ssa_exprt &expr); private: /// whether or not to invoke \ref field_sensitivityt::apply diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index dd7839ef5eb..ac546f6e013 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -336,7 +336,7 @@ goto_symex_statet::rename(exprt expr, const namespacet &ns) "if_exprt"); if(level == L2) - field_sensitivity.apply(ns, expr, false); + field_sensitivity.apply(ns, *this, expr, false); return renamedt{std::move(expr)}; } @@ -365,7 +365,7 @@ bool goto_symex_statet::l2_thread_read_encoding( } // only continue if an indivisible object is being accessed - if(field_sensitivityt::is_divisible(ns, expr)) + if(field_sensitivityt::is_divisible(expr)) return false; ssa_exprt ssa_l1=expr; @@ -494,7 +494,7 @@ goto_symex_statet::write_is_shared_resultt goto_symex_statet::write_is_shared( } // only continue if an indivisible object is being accessed - if(field_sensitivityt::is_divisible(ns, expr)) + if(field_sensitivityt::is_divisible(expr)) return write_is_shared_resultt::NOT_SHARED; if(atomic_section_id != 0) diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index 34df887f1a9..eab62c9387a 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -208,16 +208,15 @@ void goto_symext::symex_assign_rec( /// \ref goto_symext::symex_assign_struct_member have done, but now making use /// of the index/member that may only be known after renaming to L2 has taken /// place. +/// \param [in, out] state: symbolic execution state to perform renaming /// \param [in,out] ssa_rhs: right-hand side /// \param [in,out] lhs_mod: left-hand side /// \param ns: namespace -/// \param field_sensitivity: field sensitivity object to turn left-hand side -/// into individual fields static void rewrite_with_to_field_symbols( + goto_symext::statet &state, exprt &ssa_rhs, ssa_exprt &lhs_mod, - const namespacet &ns, - const field_sensitivityt &field_sensitivity) + const namespacet &ns) { #ifdef USE_UPDATE while(ssa_rhs.id() == ID_update && @@ -243,7 +242,7 @@ static void rewrite_with_to_field_symbols( update.new_value().type()); } - field_sensitivity.apply(field_sensitive_lhs, true); + state.field_sensitivity.apply(state, field_sensitive_lhs, true); if(field_sensitive_lhs.id() != ID_symbol) break; @@ -272,7 +271,7 @@ static void rewrite_with_to_field_symbols( with_expr.new_value().type()); } - field_sensitivity.apply(ns, field_sensitive_lhs, true); + state.field_sensitivity.apply(ns, state, field_sensitive_lhs, true); if(field_sensitive_lhs.id() != ID_symbol) break; @@ -292,10 +291,10 @@ static void rewrite_with_to_field_symbols( /// \param ns: namespace /// \param do_simplify: set to true if, and only if, simplification is enabled static void shift_indexed_access_to_lhs( + goto_symext::statet &state, exprt &ssa_rhs, ssa_exprt &lhs_mod, const namespacet &ns, - const field_sensitivityt &field_sensitivity, bool do_simplify) { if( @@ -360,7 +359,7 @@ static void shift_indexed_access_to_lhs( } } - rewrite_with_to_field_symbols(ssa_rhs, lhs_mod, ns, field_sensitivity); + rewrite_with_to_field_symbols(state, ssa_rhs, lhs_mod, ns); } void goto_symext::symex_assign_symbol( @@ -385,7 +384,7 @@ void goto_symext::symex_assign_symbol( ssa_exprt lhs_mod = lhs; shift_indexed_access_to_lhs( - l2_rhs, lhs_mod, ns, state.field_sensitivity, symex_config.simplify_opt); + state, l2_rhs, lhs_mod, ns, symex_config.simplify_opt); do_simplify(l2_rhs); diff --git a/src/goto-symex/symex_dead.cpp b/src/goto-symex/symex_dead.cpp index c036aa5e516..d563c16b4fc 100644 --- a/src/goto-symex/symex_dead.cpp +++ b/src/goto-symex/symex_dead.cpp @@ -24,7 +24,7 @@ void goto_symext::symex_dead(statet &state) ssa_exprt ssa = state.rename_ssa(ssa_exprt{code.symbol()}, ns).get(); - const exprt fields = field_sensitivityt::get_fields(ns, ssa); + const exprt fields = state.field_sensitivity.get_fields(ns, state, ssa); find_symbols_sett fields_set; find_symbols(fields, fields_set); diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index c11c777cc27..282b9472f45 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -356,7 +356,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write) // symbols whose address is taken. PRECONDITION(!state.call_stack().empty()); exprt l1_expr = state.rename(expr, ns).get(); - state.field_sensitivity.apply(ns, l1_expr, write); + state.field_sensitivity.apply(ns, state, l1_expr, write); // start the recursion! dereference_rec(l1_expr, state, write); @@ -390,5 +390,5 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write) "simplify re-introduced dereferencing"); } - state.field_sensitivity.apply(ns, expr, write); + state.field_sensitivity.apply(ns, state, expr, write); } diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index dea5eab6533..55556d3d8df 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -494,7 +494,7 @@ static void merge_names( } // field sensitivity: only merge on individual fields - if(field_sensitivityt::is_divisible(ns, ssa)) + if(field_sensitivityt::is_divisible(ssa)) return; // shared variables are renamed on every access anyway, we don't need to From c964e4c73adb6dcef660e0a80fc78e0a5261a617 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 11 Mar 2019 13:43:54 +0000 Subject: [PATCH 06/10] Apply field-sensitivity expansion after removing derefs This is because a pointer into the middle of a struct can resolve into something like &struct.field, in which case we need to rewrite that as the field-sensitive SSA symbol before any further dereferencing. --- src/goto-symex/symex_dereference.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 282b9472f45..e6879fd491e 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -241,6 +241,10 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) // this may yield a new auto-object trigger_auto_object(expr, state); + + // ...and may have introduced a member-of-symbol construct with a + // corresponding SSA symbol: + state.field_sensitivity.apply(ns, state, expr, write); } else if( expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member && From 267e54d375a5858ea9560bea8cf90f62711ca4ba Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 27 Mar 2019 16:08:30 +0000 Subject: [PATCH 07/10] Remove composite object from the constant propagator when a field is updated Otherwise e.g. in "memset2" we had the whole object being initialised with "{0, 0}" (which is stored in const prop and then expanded to individual fields), then later any operation that needed the whole-struct representation (such as the memset-of-one-field in that test, which expanded to "whole_struct = byte_update(whole_struct, offset, int, 0)") would get the whole-struct definition stored in const-prop, disregarding any intermediate updates that were able to directly reference a single field. This avoids having two different versions of a symbol (the field-of-composite and the indivisible field versions) in the most direct way possible. --- src/goto-symex/field_sensitivity.cpp | 24 ++++++++++------ src/goto-symex/field_sensitivity.h | 9 ++++-- src/goto-symex/goto_symex_state.cpp | 17 +++++++++-- src/goto-symex/symex_assign.cpp | 2 -- src/pointer-analysis/value_set.cpp | 43 ++++++++++++++++++++++++++++ src/pointer-analysis/value_set.h | 12 ++++++++ 6 files changed, 92 insertions(+), 15 deletions(-) diff --git a/src/goto-symex/field_sensitivity.cpp b/src/goto-symex/field_sensitivity.cpp index 2f5adc4a3e6..6f55247e03e 100644 --- a/src/goto-symex/field_sensitivity.cpp +++ b/src/goto-symex/field_sensitivity.cpp @@ -177,15 +177,17 @@ exprt field_sensitivityt::get_fields( void field_sensitivityt::field_assignments( const namespacet &ns, goto_symex_statet &state, - const exprt &lhs, - symex_targett &target) + const ssa_exprt &lhs, + symex_targett &target, + bool allow_pointer_unsoundness) { exprt lhs_fs = lhs; apply(ns, state, lhs_fs, false); bool run_apply_bak = run_apply; run_apply = false; - field_assignments_rec(ns, state, lhs_fs, lhs, target); + field_assignments_rec( + ns, state, lhs_fs, lhs, target, allow_pointer_unsoundness); run_apply = run_apply_bak; } @@ -198,12 +200,14 @@ void field_sensitivityt::field_assignments( /// \param lhs_fs: expanded symbol /// \param lhs: non-expanded symbol /// \param target: symbolic execution equation store +/// \param allow_pointer_unsoundness: allow pointer unsoundness void field_sensitivityt::field_assignments_rec( const namespacet &ns, goto_symex_statet &state, const exprt &lhs_fs, const exprt &lhs, - symex_targett &target) + symex_targett &target, + bool allow_pointer_unsoundness) { if(lhs == lhs_fs) return; @@ -213,7 +217,8 @@ void field_sensitivityt::field_assignments_rec( simplify(ssa_rhs, ns); ssa_exprt ssa_lhs = to_ssa_expr(lhs_fs); - state.assignment(ssa_lhs, ssa_rhs, ns, true, true); + state.assignment( + ssa_lhs, ssa_rhs, ns, true, true, allow_pointer_unsoundness); // do the assignment target.assignment( @@ -240,7 +245,8 @@ void field_sensitivityt::field_assignments_rec( const member_exprt member_rhs(lhs, comp.get_name(), comp.type()); const exprt &member_lhs = *fs_it; - field_assignments_rec(ns, state, member_lhs, member_rhs, target); + field_assignments_rec( + ns, state, member_lhs, member_rhs, target, allow_pointer_unsoundness); ++fs_it; } } @@ -258,7 +264,8 @@ void field_sensitivityt::field_assignments_rec( const index_exprt index_rhs(lhs, from_integer(i, index_type())); const exprt &index_lhs = *fs_it; - field_assignments_rec(ns, state, index_lhs, index_rhs, target); + field_assignments_rec( + ns, state, index_lhs, index_rhs, target, allow_pointer_unsoundness); ++fs_it; } } @@ -271,7 +278,8 @@ void field_sensitivityt::field_assignments_rec( exprt::operandst::const_iterator fs_it = lhs_fs.operands().begin(); for(const exprt &op : lhs.operands()) { - field_assignments_rec(ns, state, *fs_it, op, target); + field_assignments_rec( + ns, state, *fs_it, op, target, allow_pointer_unsoundness); ++fs_it; } } diff --git a/src/goto-symex/field_sensitivity.h b/src/goto-symex/field_sensitivity.h index ab7747a7b5a..35b10c90fbd 100644 --- a/src/goto-symex/field_sensitivity.h +++ b/src/goto-symex/field_sensitivity.h @@ -27,11 +27,13 @@ class field_sensitivityt /// \param state: symbolic execution state /// \param lhs: non-expanded symbol /// \param target: symbolic execution equation store + /// \param allow_pointer_unsoundness: allow pointer unsoundness void field_assignments( const namespacet &ns, goto_symex_statet &state, - const exprt &lhs, - symex_targett &target); + const ssa_exprt &lhs, + symex_targett &target, + bool allow_pointer_unsoundness); /// Turn an expression \p expr into a field-sensitive SSA expression. /// Field-sensitive SSA expressions have individual symbols for each @@ -81,7 +83,8 @@ class field_sensitivityt goto_symex_statet &state, const exprt &lhs_fs, const exprt &lhs, - symex_targett &target); + symex_targett &target, + bool allow_pointer_unsoundness); }; #endif // CPROVER_GOTO_SYMEX_FIELD_SENSITIVITY_H diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index ac546f6e013..008e9f8b4d0 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -170,6 +170,7 @@ void goto_symex_statet::assignment( { DATA_INVARIANT(!check_renaming_l1(lhs), "lhs renaming failed on l1"); } + const ssa_exprt l1_lhs = lhs; #if 0 PRECONDITION(l1_identifier != get_original_name(l1_identifier) @@ -221,11 +222,23 @@ void goto_symex_statet::assignment( value_set.assign(l1_lhs, l1_rhs, ns, rhs_is_simplified, is_shared); } - #if 0 + if(field_sensitivityt::is_divisible(l1_lhs)) + { + // Split composite symbol lhs into its components + field_sensitivity.field_assignments( + ns, *this, l1_lhs, *symex_target, allow_pointer_unsoundness); + // Erase the composite symbol from our working state. Note that we need to + // have it in the propagation table and the value set while doing the field + // assignments, thus we cannot skip putting it in there above. + propagation.erase(l1_identifier); + value_set.erase_symbol(l1_lhs, ns); + } + +#if 0 std::cout << "Assigning " << l1_identifier << '\n'; value_set.output(ns, std::cout); std::cout << "**********************\n"; - #endif +#endif } template diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index eab62c9387a..7df4c5e1cec 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -434,8 +434,6 @@ void goto_symext::symex_assign_symbol( state.source, assignment_type); - state.field_sensitivity.field_assignments(ns, state, lhs_mod, target); - // Restore the guard guard.pop_back(); } diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 6a5a507ce57..8218efdf428 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -1653,3 +1653,46 @@ void value_sett::erase_values_from_entry( } values.replace(index, std::move(replacement)); } + +void value_sett::erase_struct_union_symbol( + const struct_union_typet &struct_union_type, + const std::string &erase_prefix, + const namespacet &ns) +{ + for(const auto &c : struct_union_type.components()) + { + const typet &subtype = c.type(); + const irep_idt &name = c.get_name(); + + // ignore methods and padding + if(subtype.id() == ID_code || c.get_is_padding()) + continue; + + erase_symbol_rec(subtype, erase_prefix + "." + id2string(name), ns); + } +} + +void value_sett::erase_symbol_rec( + const typet &type, + const std::string &erase_prefix, + const namespacet &ns) +{ + if(type.id() == ID_struct_tag) + erase_struct_union_symbol( + ns.follow_tag(to_struct_tag_type(type)), erase_prefix, ns); + else if(type.id() == ID_union_tag) + erase_struct_union_symbol( + ns.follow_tag(to_union_tag_type(type)), erase_prefix, ns); + else if(type.id() == ID_array) + erase_symbol_rec(type.subtype(), erase_prefix + "[]", ns); + else if(values.has_key(erase_prefix)) + values.erase(erase_prefix); +} + +void value_sett::erase_symbol( + const symbol_exprt &symbol_expr, + const namespacet &ns) +{ + erase_symbol_rec( + symbol_expr.type(), id2string(symbol_expr.get_identifier()), ns); +} diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index dbaf1998cde..d5537d0502f 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -483,6 +483,8 @@ class value_sett const irep_idt &index, const std::unordered_set &values_to_erase); + void erase_symbol(const symbol_exprt &symbol_expr, const namespacet &ns); + protected: /// Reads the set of objects pointed to by `expr`, including making /// recursive lookups for dereference operations etc. @@ -517,6 +519,16 @@ class value_sett const exprt &src, exprt &dest) const; + void erase_symbol_rec( + const typet &type, + const std::string &erase_prefix, + const namespacet &ns); + + void erase_struct_union_symbol( + const struct_union_typet &struct_union_type, + const std::string &erase_prefix, + const namespacet &ns); + // Subclass customisation points: protected: From 38382353fe3534c06c9e16343736242e91cfe9d9 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 11 Mar 2019 13:44:00 +0000 Subject: [PATCH 08/10] Comment on and clarify field-sensitivity-specific expression transformations The function name was previously incorrect (it did more than it said), so now I just call both transformations from symex_assign. --- src/goto-symex/symex_assign.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index 7df4c5e1cec..8fb04893712 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -291,7 +291,6 @@ static void rewrite_with_to_field_symbols( /// \param ns: namespace /// \param do_simplify: set to true if, and only if, simplification is enabled static void shift_indexed_access_to_lhs( - goto_symext::statet &state, exprt &ssa_rhs, ssa_exprt &lhs_mod, const namespacet &ns, @@ -358,8 +357,6 @@ static void shift_indexed_access_to_lhs( lhs_mod = to_ssa_expr(byte_extract); } } - - rewrite_with_to_field_symbols(state, ssa_rhs, lhs_mod, ns); } void goto_symext::symex_assign_symbol( @@ -383,8 +380,13 @@ void goto_symext::symex_assign_symbol( ssa_exprt lhs_mod = lhs; - shift_indexed_access_to_lhs( - state, l2_rhs, lhs_mod, ns, symex_config.simplify_opt); + // Note the following two calls are specifically required for + // field-sensitivity. For example, with-expressions, which may have just been + // introduced by symex_assign_struct_member, are transformed into member + // expressions on the LHS. If we add an option to disable field-sensitivity + // in the future these should be omitted. + shift_indexed_access_to_lhs(l2_rhs, lhs_mod, ns, symex_config.simplify_opt); + rewrite_with_to_field_symbols(state, l2_rhs, lhs_mod, ns); do_simplify(l2_rhs); From 3b176349df4a9c97696093ea6dbd3acf296151e5 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 8 Apr 2019 15:27:25 +0100 Subject: [PATCH 09/10] Move field expansion outside goto_symex_statet::assignment Otherwise while everything works correctly for the state, the target sees the field expansions before the composite definition, which is a problem if the values are non-constant and so the expansion RHSes refer to the composite! --- src/goto-symex/goto_symex_state.cpp | 12 ----------- src/goto-symex/symex_assign.cpp | 32 +++++++++++++++++++---------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index 008e9f8b4d0..3943cf34788 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -222,18 +222,6 @@ void goto_symex_statet::assignment( value_set.assign(l1_lhs, l1_rhs, ns, rhs_is_simplified, is_shared); } - if(field_sensitivityt::is_divisible(l1_lhs)) - { - // Split composite symbol lhs into its components - field_sensitivity.field_assignments( - ns, *this, l1_lhs, *symex_target, allow_pointer_unsoundness); - // Erase the composite symbol from our working state. Note that we need to - // have it in the propagation table and the value set while doing the field - // assignments, thus we cannot skip putting it in there above. - propagation.erase(l1_identifier); - value_set.erase_symbol(l1_lhs, ns); - } - #if 0 std::cout << "Assigning " << l1_identifier << '\n'; value_set.output(ns, std::cout); diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index 8fb04893712..50c325dd715 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -390,9 +390,10 @@ void goto_symext::symex_assign_symbol( do_simplify(l2_rhs); - ssa_exprt ssa_lhs = lhs_mod; + ssa_exprt l2_lhs = lhs_mod; + ssa_exprt l1_lhs = l2_lhs; // l2_lhs will be renamed to L2 by the following: state.assignment( - ssa_lhs, + l2_lhs, l2_rhs, ns, symex_config.simplify_opt, @@ -400,25 +401,22 @@ void goto_symext::symex_assign_symbol( symex_config.allow_pointer_unsoundness); exprt ssa_full_lhs=full_lhs; - ssa_full_lhs=add_to_lhs(ssa_full_lhs, ssa_lhs); + ssa_full_lhs = add_to_lhs(ssa_full_lhs, l2_lhs); const bool record_events=state.record_events; state.record_events=false; exprt l2_full_lhs = state.rename(std::move(ssa_full_lhs), ns).get(); state.record_events=record_events; // do the assignment - const symbolt &symbol = ns.lookup(ssa_lhs.get_object_name()); + const symbolt &symbol = ns.lookup(l2_lhs.get_object_name()); if(symbol.is_auxiliary) assignment_type=symex_targett::assignment_typet::HIDDEN; log.conditional_output( - log.debug(), - [this, &ssa_lhs](messaget::mstreamt &mstream) { - mstream << "Assignment to " << ssa_lhs.get_identifier() - << " [" - << pointer_offset_bits(ssa_lhs.type(), ns).value_or(0) - << " bits]" + log.debug(), [this, &l2_lhs](messaget::mstreamt &mstream) { + mstream << "Assignment to " << l2_lhs.get_identifier() << " [" + << pointer_offset_bits(l2_lhs.type(), ns).value_or(0) << " bits]" << messaget::eom; }); @@ -429,13 +427,25 @@ void goto_symext::symex_assign_symbol( get_original_name(original_lhs); target.assignment( conjunction(guard), - ssa_lhs, + l2_lhs, l2_full_lhs, original_lhs, l2_rhs, state.source, assignment_type); + if(field_sensitivityt::is_divisible(l1_lhs)) + { + // Split composite symbol lhs into its components + state.field_sensitivity.field_assignments( + ns, state, l1_lhs, target, symex_config.allow_pointer_unsoundness); + // Erase the composite symbol from our working state. Note that we need to + // have it in the propagation table and the value set while doing the field + // assignments, thus we cannot skip putting it in there above. + state.propagation.erase(l1_lhs.get_identifier()); + state.value_set.erase_symbol(l1_lhs, ns); + } + // Restore the guard guard.pop_back(); } From 8cd443cf14eec1da3a062de983665772239d00fe Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 12 Apr 2019 11:36:16 +0100 Subject: [PATCH 10/10] field_sensitivityt::get_fields: recurse on operands even when input is already L2 For an L1 or lower expression, get_fields already recursed, e.g. to turn `x` -> `{ { x..a..g, x..a..h }, x..b }` rather than `{ x..a, a..b }` when its `a` member is itself struct-typed. However this recursion was omitted in the case where the input expression is already L2 renamed. --- src/goto-symex/field_sensitivity.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/goto-symex/field_sensitivity.cpp b/src/goto-symex/field_sensitivity.cpp index 6f55247e03e..e3465c2c44e 100644 --- a/src/goto-symex/field_sensitivity.cpp +++ b/src/goto-symex/field_sensitivity.cpp @@ -133,7 +133,10 @@ exprt field_sensitivityt::get_fields( tmp.remove_level_2(); tmp.set_expression(member); if(was_l2) - result.add_to_operands(state.rename(tmp, ns).get()); + { + result.add_to_operands( + state.rename(get_fields(ns, state, tmp), ns).get()); + } else result.add_to_operands(get_fields(ns, state, tmp)); } @@ -162,7 +165,10 @@ exprt field_sensitivityt::get_fields( tmp.remove_level_2(); tmp.set_expression(index); if(was_l2) - result.add_to_operands(state.rename(tmp, ns).get()); + { + result.add_to_operands( + state.rename(get_fields(ns, state, tmp), ns).get()); + } else result.add_to_operands(get_fields(ns, state, tmp)); }