From 7713e8a82f5ac2b4126ec15d36db05776a058806 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:45 +0000 Subject: [PATCH 01/18] Move apply_condition from symex_assume_l2 to symex_assume This is in preparation for putting try_filter_value_sets after it. As noted in the comment, it is important that it comes after rename. --- src/goto-symex/symex_main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index cef3bf55539..d287ce31a66 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -128,6 +128,10 @@ void goto_symext::symex_assume(statet &state, const exprt &cond) simplified_cond = state.rename(std::move(simplified_cond), ns).get(); do_simplify(simplified_cond); + // apply_condition must come after rename because it might change the + // constant propagator and the value-set and we read from those in rename + state.apply_condition(simplified_cond, state, ns); + symex_assume_l2(state, simplified_cond); } @@ -158,8 +162,6 @@ void goto_symext::symex_assume_l2(statet &state, const exprt &cond) if(state.atomic_section_id!=0 && state.guard.is_false()) symex_atomic_end(state); - - state.apply_condition(cond, state, ns); } void goto_symext::rewrite_quantifiers(exprt &expr, statet &state) From 94b71eac3218176cd4031ceec8df9043053e5dcb Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:48 +0000 Subject: [PATCH 02/18] Move apply_goto_condition into goto_symext This is in preparation for calling try_filter_value_sets in apply_goto_condition --- src/goto-symex/goto_symex.h | 16 ++++++++++++++++ src/goto-symex/symex_goto.cpp | 12 ++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 6871883e452..fbfc03cd62a 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -315,6 +315,22 @@ class goto_symext virtual void symex_other(statet &state); void symex_assert(const goto_programt::instructiont &, statet &); + + /// Propagate constants and points-to information implied by a GOTO condition. + /// See \ref goto_statet::apply_condition for aspects of this which are common + /// to GOTO and ASSUME instructions. + /// \param current_state: state prior to the GOTO instruction + /// \param jump_taken_state: state following taking the GOTO + /// \param jump_not_taken_state: fall-through state + /// \param new_guard: GOTO condition, L2 renamed and simplified + /// \param ns: global namespace + void apply_goto_condition( + goto_symex_statet ¤t_state, + goto_statet &jump_taken_state, + goto_statet &jump_not_taken_state, + const exprt &new_guard, + const namespacet &ns); + virtual void vcc( const exprt &, const std::string &msg, diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index 45d0529b1a6..d06db97633c 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -21,16 +21,8 @@ Author: Daniel Kroening, kroening@kroening.com #include -/// Propagate constants and points-to information implied by a GOTO condition. -/// See \ref goto_statet::apply_condition for aspects of this which are common -/// to GOTO and ASSUME instructions. -/// \param current_state: state prior to the GOTO instruction -/// \param jump_taken_state: state following taking the GOTO -/// \param jump_not_taken_state: fall-through state -/// \param new_guard: GOTO condition, L2 renamed and simplified -/// \param ns: global namespace -static void apply_goto_condition( - const goto_symex_statet ¤t_state, +void goto_symext::apply_goto_condition( + goto_symex_statet ¤t_state, goto_statet &jump_taken_state, goto_statet &jump_not_taken_state, const exprt &new_guard, From d4f0d945a28d752158545c5a266684420320b92a Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:49 +0000 Subject: [PATCH 03/18] Pass original guard to apply_goto_condition This is in preparation for calling try_filter_value_sets inside apply_goto_condition --- src/goto-symex/goto_symex.h | 2 ++ src/goto-symex/symex_goto.cpp | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index fbfc03cd62a..5e309db2d28 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -322,12 +322,14 @@ class goto_symext /// \param current_state: state prior to the GOTO instruction /// \param jump_taken_state: state following taking the GOTO /// \param jump_not_taken_state: fall-through state + /// \param original_guard: the original GOTO condition /// \param new_guard: GOTO condition, L2 renamed and simplified /// \param ns: global namespace void apply_goto_condition( goto_symex_statet ¤t_state, goto_statet &jump_taken_state, goto_statet &jump_not_taken_state, + const exprt &original_guard, const exprt &new_guard, const namespacet &ns); diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index d06db97633c..67e899debd3 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -25,6 +25,7 @@ void goto_symext::apply_goto_condition( goto_symex_statet ¤t_state, goto_statet &jump_taken_state, goto_statet &jump_not_taken_state, + const exprt &original_guard, const exprt &new_guard, const namespacet &ns) { @@ -261,7 +262,13 @@ void goto_symext::symex_goto(statet &state) auto &taken_state = backward ? state : goto_state_list.back().second; auto ¬_taken_state = backward ? goto_state_list.back().second : state; - apply_goto_condition(state, taken_state, not_taken_state, new_guard, ns); + apply_goto_condition( + state, + taken_state, + not_taken_state, + instruction.get_condition(), + new_guard, + ns); } // produce new guard symbol From 4e0ba71bedce5ee365a9827099d4f832deb45757 Mon Sep 17 00:00:00 2001 From: Owen Date: Mon, 25 Mar 2019 15:09:14 +0000 Subject: [PATCH 04/18] Make non-const version of find_entry --- src/pointer-analysis/value_set.cpp | 9 +++++++-- src/pointer-analysis/value_set.h | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index a312a963b3f..e4c4df492dd 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -48,13 +48,18 @@ bool value_sett::field_sensitive(const irep_idt &id, const typet &type) return type.id() == ID_struct || type.id() == ID_struct_tag; } -const value_sett::entryt *value_sett::find_entry(const value_sett::idt &id) - const +value_sett::entryt *value_sett::find_entry(const value_sett::idt &id) { auto found = values.find(id); return found == values.end() ? nullptr : &found->second; } +const value_sett::entryt * +value_sett::find_entry(const value_sett::idt &id) const +{ + return const_cast(this)->find_entry(id); +} + value_sett::entryt &value_sett::get_entry(const entryt &e, const typet &type) { irep_idt index; diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index df479262cde..0632d15cd30 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -328,6 +328,9 @@ class value_sett /// \return a constant pointer to an entry if found, or null otherwise. /// Note the pointer may be invalidated by insert operations, including /// get_entry. + entryt *find_entry(const idt &id); + + /// Const version of \ref find_entry const entryt *find_entry(const idt &id) const; /// Gets or inserts an entry in this value-set. From 0e8bb88914531e7785900493d74a3de1d7d1c889 Mon Sep 17 00:00:00 2001 From: Owen Date: Mon, 25 Mar 2019 15:16:15 +0000 Subject: [PATCH 05/18] Factor out value_sett::get_entry_for_symbol Make this a separate function so it can be used from other places. No change in behaviour. --- src/pointer-analysis/value_set.cpp | 98 +++++++++++++++++++----------- src/pointer-analysis/value_set.h | 20 ++++++ 2 files changed, 82 insertions(+), 36 deletions(-) diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index e4c4df492dd..3ce22332a53 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -371,6 +371,64 @@ static std::string strip_first_field_from_suffix( return suffix.substr(field.length() + 1); } +value_sett::entryt *value_sett::get_entry_for_symbol( + const irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) +{ + if( + type.id() != ID_pointer && type.id() != ID_signedbv && + type.id() != ID_unsignedbv && type.id() != ID_array && + type.id() != ID_struct && type.id() != ID_struct_tag && + type.id() != ID_union && type.id() != ID_union_tag) + { + return nullptr; + } + + const typet &followed_type = type.id() == ID_struct_tag + ? ns.follow_tag(to_struct_tag_type(type)) + : type.id() == ID_union_tag + ? ns.follow_tag(to_union_tag_type(type)) + : type; + + // look it up + value_sett::entryt *entry = find_entry(id2string(identifier) + suffix); + + // try first component name as suffix if not yet found + if( + !entry && + (followed_type.id() == ID_struct || followed_type.id() == ID_union)) + { + const struct_union_typet &struct_union_type = + to_struct_union_type(followed_type); + + const irep_idt &first_component_name = + struct_union_type.components().front().get_name(); + + entry = find_entry( + id2string(identifier) + "." + id2string(first_component_name) + suffix); + } + + if(!entry) + { + // not found? try without suffix + entry = find_entry(identifier); + } + + return entry; +} + +const value_sett::entryt *value_sett::get_entry_for_symbol( + const irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) const +{ + return const_cast(this)->get_entry_for_symbol( + identifier, type, suffix, ns); +} + void value_sett::get_value_set_rec( const exprt &expr, object_mapt &dest, @@ -418,43 +476,11 @@ void value_sett::get_value_set_rec( } else if(expr.id()==ID_symbol) { - irep_idt identifier=to_symbol_expr(expr).get_identifier(); - - // is it a pointer, integer, array or struct? - if(expr_type.id()==ID_pointer || - expr_type.id()==ID_signedbv || - expr_type.id()==ID_unsignedbv || - expr_type.id()==ID_struct || - expr_type.id()==ID_union || - expr_type.id()==ID_array) - { - // look it up - const entryt *entry = - find_entry(id2string(identifier) + suffix); + const entryt *entry = get_entry_for_symbol( + to_symbol_expr(expr).get_identifier(), expr_type, suffix, ns); - // try first component name as suffix if not yet found - if(!entry && (expr_type.id() == ID_struct || expr_type.id() == ID_union)) - { - const struct_union_typet &struct_union_type= - to_struct_union_type(expr_type); - - const irep_idt &first_component_name = - struct_union_type.components().front().get_name(); - - entry = find_entry( - id2string(identifier) + "." + id2string(first_component_name) + - suffix); - } - - // not found? try without suffix - if(!entry) - entry = find_entry(identifier); - - if(entry) - make_union(dest, entry->object_map); - else - insert(dest, exprt(ID_unknown, original_type)); - } + if(entry) + make_union(dest, entry->object_map); else insert(dest, exprt(ID_unknown, original_type)); } diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index 0632d15cd30..8abd1c38415 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -463,6 +463,26 @@ class value_sett exprt &expr, const namespacet &ns) const; + /// Get the entry for the symbol and suffix + /// \param identifier: The identifier for the symbol + /// \param type: The type of the symbol + /// \param suffix: The suffix for the entry + /// \param ns: The global namespace, for following \p type if it is a + /// struct tag type or a union tag type + /// \return The entry for the symbol and suffix + value_sett::entryt *get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns); + + /// const version of /ref get_entry_for_symbol + const value_sett::entryt *get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) const; + protected: /// Reads the set of objects pointed to by `expr`, including making /// recursive lookups for dereference operations etc. From 24925b69dd84f74384b40d487bd7ee04da0ac8a0 Mon Sep 17 00:00:00 2001 From: Owen Date: Mon, 25 Mar 2019 15:16:21 +0000 Subject: [PATCH 06/18] filter value sets at gotos and assumes When a conditional change of control flow in goto-symex (i.e. gotos and assumes), if there is a pointer-typed symbol in the condition then try to filter its value set separately for each branch based on which values are actually possible on that branch. We only do this when there is only one pointer-typed symbol in the condition. The intention is to make the value-sets more accurate. In particular, this lays the groundwork for dealing with virtual method dispatch much more efficiently. See #2122 for an approach that was rejected. For example in java, code like `x.equals(y)`, where `x` could point to an Object or a String, gets turned into code like ``` if (x.@class_identifier == 'java.lang.Object') x . java.lang.Object.equals(y) else if (x.@class_identifier == 'java.lang.String') x . java.lang.String.equals(y) ``` When symex goes into java.lang.String.equals it doesn't filter the value-set so that `this` (which is `x`) only points to the String, not the Object. This causes symex to add complicated expressions to the formula for field accesses to fields that x won't have if it points to an Object. --- src/goto-symex/goto_symex.h | 23 +++++ src/goto-symex/symex_goto.cpp | 21 ++++ src/goto-symex/symex_main.cpp | 161 +++++++++++++++++++++++++++++ src/pointer-analysis/value_set.cpp | 22 ++++ src/pointer-analysis/value_set.h | 2 + 5 files changed, 229 insertions(+) diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 5e309db2d28..3b75c5c8e98 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -333,6 +333,29 @@ class goto_symext const exprt &new_guard, const namespacet &ns); + /// Try to filter value sets based on whether possible values of a + /// pointer-typed symbol make the condition true or false. We only do this + /// when there is only one pointer-typed symbol in \p condition. + /// \param state: The current state + /// \param condition: The condition which is being evaluated, which it expects + /// will not have been cleaned or renamed. In practice, it's fine if it has + /// been cleaned and renamed up to level L1. + /// \param original_value_set: The value set we will read from + /// \param jump_taken_value_set: The value set that will be used when the + /// condition is true, so we remove any elements which we can tell will + /// make the condition false, or nullptr if this shouldn't be done + /// \param jump_not_taken_value_set: The value set that will be used when the + /// condition is false, so we remove any elements which we can tell will + /// make the condition true, or nullptr if this shouldn't be done + /// \param ns: A namespace + void try_filter_value_sets( + goto_symex_statet &state, + exprt condition, + const value_sett &original_value_set, + value_sett *jump_taken_value_set, + value_sett *jump_not_taken_value_set, + const namespacet &ns); + virtual void vcc( const exprt &, const std::string &msg, diff --git a/src/goto-symex/symex_goto.cpp b/src/goto-symex/symex_goto.cpp index 67e899debd3..667c911c12f 100644 --- a/src/goto-symex/symex_goto.cpp +++ b/src/goto-symex/symex_goto.cpp @@ -29,6 +29,27 @@ void goto_symext::apply_goto_condition( const exprt &new_guard, const namespacet &ns) { + // It would be better to call try_filter_value_sets after apply_condition, + // and pass nullptr for value sets which apply_condition successfully updated + // already. However, try_filter_value_sets calls rename to do constant + // propagation, and apply_condition can update the constant propagator. Since + // apply condition will never succeed on both jump_taken_state and + // jump_not_taken_state, it should be possible to pass a reference to an + // unmodified goto_statet to use for renaming. But renaming needs a + // goto_symex_statet, not just a goto_statet, and we only have access to one + // of those. A future improvement would be to split rename into two parts: + // one part on goto_symex_statet which is non-const and deals with + // l2 thread reads, and one part on goto_statet which is const and could be + // used in try_filter_value_sets. + + try_filter_value_sets( + current_state, + original_guard, + jump_taken_state.value_set, + &jump_taken_state.value_set, + &jump_not_taken_state.value_set, + ns); + jump_taken_state.apply_condition(new_guard, current_state, ns); // Try to find a negative condition that implies an equality constraint on diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index d287ce31a66..f9beefaea0c 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -15,6 +15,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -128,6 +129,13 @@ void goto_symext::symex_assume(statet &state, const exprt &cond) simplified_cond = state.rename(std::move(simplified_cond), ns).get(); do_simplify(simplified_cond); + // It would be better to call try_filter_value_sets after apply_condition, + // but it is not currently possible. See the comment at the beginning of + // \ref apply_goto_condition for more information. + + try_filter_value_sets( + state, cond, state.value_set, &state.value_set, nullptr, ns); + // apply_condition must come after rename because it might change the // constant propagator and the value-set and we read from those in rename state.apply_condition(simplified_cond, state, ns); @@ -543,3 +551,156 @@ void goto_symext::symex_step( UNREACHABLE; } } + +/// Check if an expression only contains one unique symbol (possibly repeated +/// multiple times) +/// \param expr: The expression to examine +/// \return If only one unique symbol occurs in \p expr then return it; +/// otherwise return an empty optionalt +static optionalt +find_unique_pointer_typed_symbol(const exprt &expr) +{ + optionalt return_value; + for(auto it = expr.depth_cbegin(); it != expr.depth_cend(); ++it) + { + const symbol_exprt *symbol_expr = expr_try_dynamic_cast(*it); + if(symbol_expr && can_cast_type(symbol_expr->type())) + { + // If we already have a potential return value, check if it is the same + // symbol, and return an empty optionalt if not + if(return_value && *symbol_expr != *return_value) + { + return {}; + } + return_value = *symbol_expr; + } + } + + // Either expr contains no pointer-typed symbols or it contains one unique + // pointer-typed symbol, possibly repeated multiple times + return return_value; +} + +/// This is a simplified version of value_set_dereferencet::build_reference_to. +/// It ignores the ID_dynamic_object case (which doesn't occur in goto-symex) +/// and gives up for integer addresses and non-trivial symbols +/// \param value_set_element: An element of a value-set +/// \param type: the type of the expression that might point to +/// \p value_set_element +/// \return An expression for the value of the pointer indicated by \p +/// value_set_element if it is easy to determine, or an empty optionalt +/// otherwise +static optionalt +value_set_element_to_expr(exprt value_set_element, pointer_typet type) +{ + const object_descriptor_exprt *object_descriptor = + expr_try_dynamic_cast(value_set_element); + if(!object_descriptor) + { + return {}; + } + + const exprt &root_object = object_descriptor->root_object(); + const exprt &object = object_descriptor->object(); + + if(root_object.id() == ID_null_object) + { + return null_pointer_exprt{type}; + } + else if(root_object.id() == ID_integer_address) + { + return {}; + } + else + { + // We should do something like + // value_set_dereference::dereference_type_compare, which deals with + // arrays having types containing void + if(object_descriptor->offset().is_zero() && object.type() == type.subtype()) + { + return address_of_exprt(object); + } + else + { + return {}; + } + } +} + +void goto_symext::try_filter_value_sets( + goto_symex_statet &state, + exprt condition, + const value_sett &original_value_set, + value_sett *jump_taken_value_set, + value_sett *jump_not_taken_value_set, + const namespacet &ns) +{ + condition = state.rename(std::move(condition), ns).get(); + + optionalt symbol_expr = + find_unique_pointer_typed_symbol(condition); + + if(!symbol_expr) + { + return; + } + + const pointer_typet &symbol_type = to_pointer_type(symbol_expr->type()); + + value_setst::valuest value_set_elements; + original_value_set.get_value_set(*symbol_expr, value_set_elements, ns); + + // Try evaluating the condition with the symbol replaced by a pointer to each + // one of its possible values in turn. If that leads to a true for some + // value_set_element then we can delete it from the value set that will be + // used if the condition is false, and vice versa. + for(const auto &value_set_element : value_set_elements) + { + optionalt possible_value = + value_set_element_to_expr(value_set_element, symbol_type); + + if(!possible_value) + { + continue; + } + + exprt modified_condition(condition); + + address_of_aware_replace_symbolt replace_symbol{}; + replace_symbol.insert(*symbol_expr, *possible_value); + replace_symbol(modified_condition); + + // This do_simplify() is needed for the following reason: if `condition` is + // `*p == a` and we replace `p` with `&a` then we get `*&a == a`. Suppose + // our constant propagation knows that `a` is `1`. Without this call to + // do_simplify(), state.rename() turns this into `*&a == 1` (because + // rename() doesn't do constant propagation inside addresses), which + // do_simplify() turns into `a == 1`, which cannot be evaluated as true + // without another round of constant propagation. + // It would be sufficient to replace this call to do_simplify() with + // something that just replaces `*&x` with `x` whenever it finds it. + do_simplify(modified_condition); + + const bool record_events = state.record_events; + state.record_events = false; + modified_condition = state.rename(std::move(modified_condition), ns).get(); + state.record_events = record_events; + + do_simplify(modified_condition); + + if(jump_taken_value_set && modified_condition.is_false()) + { + value_sett::entryt *entry = jump_taken_value_set->get_entry_for_symbol( + symbol_expr->get_identifier(), symbol_type, "", ns); + jump_taken_value_set->erase_value_from_entry(*entry, value_set_element); + } + else if(jump_not_taken_value_set && modified_condition.is_true()) + { + value_sett::entryt *entry = + jump_not_taken_value_set->get_entry_for_symbol( + symbol_expr->get_identifier(), symbol_type, "", ns); + jump_not_taken_value_set->erase_value_from_entry( + *entry, value_set_element); + } + } +} diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 3ce22332a53..c6e21821eba 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -1623,3 +1623,25 @@ void value_sett::guard( assign(expr.op0(), address_of, ns, false, false); } } + +void value_sett::erase_value_from_entry( + entryt &entry, + const exprt &value_to_erase) +{ + std::vector keys_to_erase; + + for(const auto &key_value : entry.object_map.read()) + { + const auto &rhs_object = to_expr(key_value); + if(rhs_object == value_to_erase) + { + keys_to_erase.emplace_back(key_value.first); + } + } + + DATA_INVARIANT( + keys_to_erase.size() == 1, + "value_sett::erase_value_from_entry() should erase exactly one value"); + + entry.object_map.write().erase(keys_to_erase[0]); +} diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index 8abd1c38415..caa22f98646 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -483,6 +483,8 @@ class value_sett const std::string &suffix, const namespacet &ns) const; + void erase_value_from_entry(entryt &entry, const exprt &value_to_erase); + protected: /// Reads the set of objects pointed to by `expr`, including making /// recursive lookups for dereference operations etc. From 185981862a4b0890c9be404d5b54574135358044 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:55 +0000 Subject: [PATCH 07/18] Try to speed up try_filter_value_sets Store a vector of expressions to erase from each entry, so we only have to get each entry once (and only do that if we actually have anything to erase). --- src/goto-symex/symex_main.cpp | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index f9beefaea0c..f1d9cec5bbf 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -650,6 +650,11 @@ void goto_symext::try_filter_value_sets( value_setst::valuest value_set_elements; original_value_set.get_value_set(*symbol_expr, value_set_elements, ns); + std::vector delete_from_jump_taken_value_set; + std::vector delete_from_jump_not_taken_value_set; + delete_from_jump_taken_value_set.reserve(value_set_elements.size()); + delete_from_jump_not_taken_value_set.reserve(value_set_elements.size()); + // Try evaluating the condition with the symbol replaced by a pointer to each // one of its possible values in turn. If that leads to a true for some // value_set_element then we can delete it from the value set that will be @@ -690,17 +695,29 @@ void goto_symext::try_filter_value_sets( if(jump_taken_value_set && modified_condition.is_false()) { - value_sett::entryt *entry = jump_taken_value_set->get_entry_for_symbol( - symbol_expr->get_identifier(), symbol_type, "", ns); - jump_taken_value_set->erase_value_from_entry(*entry, value_set_element); + delete_from_jump_taken_value_set.emplace_back(value_set_element); } else if(jump_not_taken_value_set && modified_condition.is_true()) { - value_sett::entryt *entry = - jump_not_taken_value_set->get_entry_for_symbol( - symbol_expr->get_identifier(), symbol_type, "", ns); - jump_not_taken_value_set->erase_value_from_entry( - *entry, value_set_element); + delete_from_jump_not_taken_value_set.emplace_back(value_set_element); + } + } + if(jump_taken_value_set && !delete_from_jump_taken_value_set.empty()) + { + value_sett::entryt *entry = jump_taken_value_set->get_entry_for_symbol( + symbol_expr->get_identifier(), symbol_type, "", ns); + for(const exprt &value_to_erase : delete_from_jump_taken_value_set) + { + jump_taken_value_set->erase_value_from_entry(*entry, value_to_erase); + } + } + if(jump_not_taken_value_set && !delete_from_jump_not_taken_value_set.empty()) + { + value_sett::entryt *entry = jump_not_taken_value_set->get_entry_for_symbol( + symbol_expr->get_identifier(), symbol_type, "", ns); + for(const exprt &value_to_erase : delete_from_jump_not_taken_value_set) + { + jump_not_taken_value_set->erase_value_from_entry(*entry, value_to_erase); } } } From 5baedb28cac9be32eaac51038740d0f6b9a8c766 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:53 +0000 Subject: [PATCH 08/18] Update tests for symex excluding null pointers Value-set filtering means some of the tests pass that didn't before. Also add tests that only pass because of local safe pointer analysis, not value-set filtering, so we can tell if that stops working in future. --- .../symex_should_exclude_null_pointers/main.c | 67 ++++++++++++++----- .../test.desc | 13 ++-- 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/regression/cbmc/symex_should_exclude_null_pointers/main.c b/regression/cbmc/symex_should_exclude_null_pointers/main.c index 4ee8d91e3bb..3ccab5c9585 100644 --- a/regression/cbmc/symex_should_exclude_null_pointers/main.c +++ b/regression/cbmc/symex_should_exclude_null_pointers/main.c @@ -1,3 +1,5 @@ +#include + static void noop() { } int main(int argc, char **argv) { @@ -5,12 +7,16 @@ int main(int argc, char **argv) { int x; int *maybe_null = argc & 1 ? &x : 0; - // Should work (guarded by assume): + // There are two features of symex that might exclude null pointers: local + // safe pointer analysis (LSPA for the rest of this file) and value-set + // filtering (i.e. goto_symext::try_filter_value_sets()). + + // Should be judged safe by LSPA and value-set filtering (guarded by assume): int *ptr1 = maybe_null; __CPROVER_assume(ptr1 != 0); int deref1 = *ptr1; - // Should work (guarded by else): + // Should be judged safe by LSPA and value-set filtering (guarded by else): int *ptr2 = maybe_null; if(ptr2 == 0) { @@ -20,14 +26,15 @@ int main(int argc, char **argv) { int deref2 = *ptr2; } - // Should work (guarded by if): + // Should be judged safe by LSPA and value-set filtering (guarded by if): int *ptr3 = maybe_null; if(ptr3 != 0) { int deref3 = *ptr3; } - // Shouldn't work yet despite being safe (guarded by backward goto): + // Should be judged unsafe by LSPA and safe by value-set filtering + // (guarded by backward goto): int *ptr4 = maybe_null; goto check; @@ -41,7 +48,8 @@ int main(int argc, char **argv) { end_test4: - // Shouldn't work yet despite being safe (guarded by confluence): + // Should be judged unsafe by LSPA and safe by value-set filtering + // (guarded by confluence): int *ptr5 = maybe_null; if(argc == 5) __CPROVER_assume(ptr5 != 0); @@ -49,26 +57,49 @@ int main(int argc, char **argv) { __CPROVER_assume(ptr5 != 0); int deref5 = *ptr5; - // Shouldn't work (unsafe as only guarded on one branch): + // Should be judged unsafe by LSPA (due to suspicion write to ptr5 might alter + // ptr6) and safe by value-set filtering: int *ptr6 = maybe_null; - if(argc == 6) - __CPROVER_assume(ptr6 != 0); - else - { - } + __CPROVER_assume(ptr6 != 0); + ptr5 = 0; int deref6 = *ptr6; - // Shouldn't work due to suspicion write to ptr6 might alter ptr7: + // Should be judged unsafe by LSPA (due to suspicion noop() call might alter + // ptr7) and safe by value-set filtering: int *ptr7 = maybe_null; __CPROVER_assume(ptr7 != 0); - ptr6 = 0; + noop(); int deref7 = *ptr7; - // Shouldn't work due to suspicion noop() call might alter ptr8: - int *ptr8 = maybe_null; - __CPROVER_assume(ptr8 != 0); - noop(); - int deref8 = *ptr8; + // Should be judged safe by LSPA and unsafe by value-set filtering (it + // isn't known what symbol *ptr_ptr8 refers to, so null can't be removed + // from a specific value set): + int r8_a = 1, r8_b = 2; + int *q8_a = argc & 2 ? &r8_a : 0; + int *q8_b = argc & 4 ? &r8_b : 0; + int **ptr8 = argc & 8 ? &q8_a : &q8_b; + __CPROVER_assume(*ptr8 != 0); + int deref8 = **ptr8; + + // Should be judged safe by LSPA and unsafe by value-set filtering (it + // isn't known what symbol *ptr_ptr9 refers to, so null can't be removed + // from a specific value set): + int r9_a = 1, r9_b = 2; + int *q9_a = argc & 2 ? &r9_a : 0; + int *q9_b = argc & 4 ? &r9_b : 0; + int **ptr9 = argc & 8 ? &q9_a : &q9_b; + if(*ptr9 != 0) + int deref9 = **ptr9; + + // Should be judged unsafe by LSPA and value-set filtering + // (unsafe as only guarded on one branch): + int *ptr10 = maybe_null; + if(argc == 10) + __CPROVER_assume(ptr10 != 0); + else + { + } + int deref10 = *ptr10; assert(0); } diff --git a/regression/cbmc/symex_should_exclude_null_pointers/test.desc b/regression/cbmc/symex_should_exclude_null_pointers/test.desc index e5075fa5763..493843ebfb7 100644 --- a/regression/cbmc/symex_should_exclude_null_pointers/test.desc +++ b/regression/cbmc/symex_should_exclude_null_pointers/test.desc @@ -3,15 +3,14 @@ main.c --show-vcc ^EXIT=0$ ^SIGNAL=0$ -ptr4\$object -ptr5\$object -ptr6\$object -ptr7\$object -ptr8\$object +ptr10\$object -- -ptr[1-3]\$object +ptr[1-9]\$object ^warning: ignoring -- ptrX\$object appearing in the VCCs indicates symex was unsure whether the pointer had a valid target, and uses the $object symbol as a referred object of last resort. ptr1-3 should be judged -not-null by symex, so their $object symbols do not occur. +not-null by local safe pointer analysis, so their $object symbols do not occur. ptr4-7 are +not caught by local safe pointer analysis but they are judged safe by value-set filtering, i.e. +goto_symext::try_filter_value_sets(). ptr8-9 are judged safe by local safe pointer analysis but +not value-set filtering. ptr10 is not judged safe by either because it is not safe. From d9ad5481a36ab7f30fefd6d32ebf951702a6a15c Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:54 +0000 Subject: [PATCH 09/18] Create paths-lifo-expected-failure tag for tests Test which rely on convergence will not work with path-symex --- regression/cbmc/CMakeLists.txt | 2 +- regression/cbmc/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regression/cbmc/CMakeLists.txt b/regression/cbmc/CMakeLists.txt index 8d7709afb89..b6c470209ec 100644 --- a/regression/cbmc/CMakeLists.txt +++ b/regression/cbmc/CMakeLists.txt @@ -5,7 +5,7 @@ add_test_pl_tests( add_test_pl_profile( "cbmc-paths-lifo" "$ --paths lifo" - "-C;-X;thorough-paths;-X;smt-backend;-s;paths-lifo" + "-C;-X;thorough-paths;-X;smt-backend;-X;paths-lifo-expected-failure;-s;paths-lifo" "CORE" ) diff --git a/regression/cbmc/Makefile b/regression/cbmc/Makefile index 2ff63786dca..83429155149 100644 --- a/regression/cbmc/Makefile +++ b/regression/cbmc/Makefile @@ -7,7 +7,7 @@ test-cprover-smt2: @../test.pl -e -p -c "../../../src/cbmc/cbmc --cprover-smt2" -X broken-smt-backend test-paths-lifo: - @../test.pl -e -p -c "../../../src/cbmc/cbmc --paths lifo" -X thorough-paths -X smt-backend + @../test.pl -e -p -c "../../../src/cbmc/cbmc --paths lifo" -X thorough-paths -X smt-backend -X paths-lifo-expected-failure tests.log: ../test.pl test From ffafa86aaec825a2e28f2b4abcf46df628ecef0f Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:56 +0000 Subject: [PATCH 10/18] Refactor value_set_dereferencet The aim is to make value_set_dereferencet::build_reference_to accessible from the outside, by making it public and static. This required making several other functions public and static as well. The motivation is to use in value_set_element_to_expr, which is a helper function for goto_symext::try_filter_value_sets. --- .../value_set_dereference.cpp | 44 +++++++++++------ src/pointer-analysis/value_set_dereference.h | 49 ++++++++++++------- 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 223b930cded..68c75184cef 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -74,7 +74,8 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) it!=points_to_set.end(); it++) { - valuet value = build_reference_to(*it, pointer); + valuet value = + build_reference_to(*it, pointer, exclude_null_derefs, language_mode, ns); #if 0 std::cout << "V: " << format(value.pointer_guard) << " --> "; @@ -179,7 +180,8 @@ exprt value_set_dereferencet::dereference(const exprt &pointer) /// - object_type=(int *), dereference_type=(void **) is not ok; bool value_set_dereferencet::dereference_type_compare( const typet &object_type, - const typet &dereference_type) const + const typet &dereference_type, + const namespacet &ns) { const typet *object_unwrapped = &object_type; const typet *dereference_unwrapped = &dereference_type; @@ -245,6 +247,11 @@ bool value_set_dereferencet::dereference_type_compare( /// ID_unknown, ID_invalid, or an object_descriptor_exprt giving a referred /// object and offset. /// \param pointer_expr: pointer expression that may point to `what` +/// \param exclude_null_derefs: Ignore value-set entries that indicate a +/// given dereference may follow a null pointer +/// \param language_mode: Mode for any new symbols created to represent a +/// dereference failure +/// \param ns: A namespace /// \return a `valuet` object containing `guard`, `value` and `ignore` fields. /// The `ignore` field is true for a `null` object when `exclude_null_derefs` /// is true (set by our creator when they know \p what cannot be null) @@ -258,7 +265,10 @@ bool value_set_dereferencet::dereference_type_compare( /// .ignore = false}` value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( const exprt &what, - const exprt &pointer_expr) + const exprt &pointer_expr, + const bool exclude_null_derefs, + const irep_idt language_mode, + const namespacet &ns) { const typet &dereference_type = pointer_expr.type().subtype(); @@ -324,8 +334,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( result.value=index_expr; } - else if(dereference_type_compare( - memory_symbol.type.subtype(), dereference_type)) + else if( + dereference_type_compare( + memory_symbol.type.subtype(), dereference_type, ns)) { const index_exprt index_expr( symbol_expr, @@ -372,18 +383,19 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( exprt root_object_subexpression=root_object; - if(dereference_type_compare(object_type, dereference_type) && - o.offset().is_zero()) + if( + dereference_type_compare(object_type, dereference_type, ns) && + o.offset().is_zero()) { // The simplest case: types match, and offset is zero! // This is great, we are almost done. result.value = typecast_exprt::conditional_cast(object, dereference_type); } - else if(root_object_type.id()==ID_array && - dereference_type_compare( - root_object_type.subtype(), - dereference_type)) + else if( + root_object_type.id() == ID_array && + dereference_type_compare( + root_object_type.subtype(), dereference_type, ns)) { // We have an array with a subtype that matches // the dereferencing type. @@ -451,7 +463,7 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( else offset=o.offset(); - if(memory_model(result.value, dereference_type, offset)) + if(memory_model(result.value, dereference_type, offset, ns)) { // ok, done } @@ -486,7 +498,8 @@ static bool is_a_bv_type(const typet &type) bool value_set_dereferencet::memory_model( exprt &value, const typet &to_type, - const exprt &offset) + const exprt &offset, + const namespacet &ns) { // we will allow more or less arbitrary pointer type cast @@ -518,7 +531,7 @@ bool value_set_dereferencet::memory_model( // otherwise, we will stitch it together from bytes - return memory_model_bytes(value, to_type, offset); + return memory_model_bytes(value, to_type, offset, ns); } /// Replace `value` by an expression of type `to_type` corresponding to the @@ -532,7 +545,8 @@ bool value_set_dereferencet::memory_model( bool value_set_dereferencet::memory_model_bytes( exprt &value, const typet &to_type, - const exprt &offset) + const exprt &offset, + const namespacet &ns) { const typet from_type=value.type(); diff --git a/src/pointer-analysis/value_set_dereference.h b/src/pointer-analysis/value_set_dereference.h index 531030cf1ef..2ba5eed81f4 100644 --- a/src/pointer-analysis/value_set_dereference.h +++ b/src/pointer-analysis/value_set_dereference.h @@ -54,21 +54,6 @@ class value_set_dereferencet final /// \param pointer: A pointer-typed expression, to be dereferenced. exprt dereference(const exprt &pointer); -private: - const namespacet &ns; - symbol_tablet &new_symbol_table; - dereference_callbackt &dereference_callback; - /// language_mode: ID_java, ID_C or another language identifier - /// if we know the source language in use, irep_idt() otherwise. - const irep_idt language_mode; - /// Flag indicating whether `value_set_dereferencet::dereference` should - /// disregard an apparent attempt to dereference NULL - const bool exclude_null_derefs; - - bool dereference_type_compare( - const typet &object_type, - const typet &dereference_type) const; - /// Return value for `build_reference_to`; see that method for documentation. class valuet { @@ -82,14 +67,40 @@ class value_set_dereferencet final } }; - valuet build_reference_to(const exprt &what, const exprt &pointer); + static valuet build_reference_to( + const exprt &what, + const exprt &pointer, + bool exclude_null_derefs, + irep_idt language_mode, + const namespacet &ns); + + static bool dereference_type_compare( + const typet &object_type, + const typet &dereference_type, + const namespacet &ns); - bool memory_model(exprt &value, const typet &type, const exprt &offset); + static bool memory_model( + exprt &value, + const typet &type, + const exprt &offset, + const namespacet &ns); - bool memory_model_bytes( + static bool memory_model_bytes( exprt &value, const typet &type, - const exprt &offset); + const exprt &offset, + const namespacet &ns); + +private: + const namespacet &ns; + symbol_tablet &new_symbol_table; + dereference_callbackt &dereference_callback; + /// language_mode: ID_java, ID_C or another language identifier + /// if we know the source language in use, irep_idt() otherwise. + const irep_idt language_mode; + /// Flag indicating whether `value_set_dereferencet::dereference` should + /// disregard an apparent attempt to dereference NULL + const bool exclude_null_derefs; }; #endif // CPROVER_POINTER_ANALYSIS_VALUE_SET_DEREFERENCE_H From a555406b7b7658555c8cdb8475df04bec1b7d5b8 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:58 +0000 Subject: [PATCH 11/18] Use build_reference_to when filtering value sets --- src/goto-symex/symex_main.cpp | 69 +++++++++-------------------------- 1 file changed, 18 insertions(+), 51 deletions(-) diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index f1d9cec5bbf..a9cc8bcf895 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -14,6 +14,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + #include #include #include @@ -581,52 +583,6 @@ find_unique_pointer_typed_symbol(const exprt &expr) return return_value; } -/// This is a simplified version of value_set_dereferencet::build_reference_to. -/// It ignores the ID_dynamic_object case (which doesn't occur in goto-symex) -/// and gives up for integer addresses and non-trivial symbols -/// \param value_set_element: An element of a value-set -/// \param type: the type of the expression that might point to -/// \p value_set_element -/// \return An expression for the value of the pointer indicated by \p -/// value_set_element if it is easy to determine, or an empty optionalt -/// otherwise -static optionalt -value_set_element_to_expr(exprt value_set_element, pointer_typet type) -{ - const object_descriptor_exprt *object_descriptor = - expr_try_dynamic_cast(value_set_element); - if(!object_descriptor) - { - return {}; - } - - const exprt &root_object = object_descriptor->root_object(); - const exprt &object = object_descriptor->object(); - - if(root_object.id() == ID_null_object) - { - return null_pointer_exprt{type}; - } - else if(root_object.id() == ID_integer_address) - { - return {}; - } - else - { - // We should do something like - // value_set_dereference::dereference_type_compare, which deals with - // arrays having types containing void - if(object_descriptor->offset().is_zero() && object.type() == type.subtype()) - { - return address_of_exprt(object); - } - else - { - return {}; - } - } -} - void goto_symext::try_filter_value_sets( goto_symex_statet &state, exprt condition, @@ -661,18 +617,29 @@ void goto_symext::try_filter_value_sets( // used if the condition is false, and vice versa. for(const auto &value_set_element : value_set_elements) { - optionalt possible_value = - value_set_element_to_expr(value_set_element, symbol_type); - - if(!possible_value) + const bool exclude_null_derefs = false; + value_set_dereferencet::valuet possible_value = + value_set_dereferencet::build_reference_to( + value_set_element, + *symbol_expr, + exclude_null_derefs, + language_mode, + ns); + + if(possible_value.ignore) { continue; } + exprt replacement_expr = + possible_value.value.is_nil() + ? static_cast(null_pointer_exprt{symbol_type}) + : static_cast(address_of_exprt{possible_value.value}); + exprt modified_condition(condition); address_of_aware_replace_symbolt replace_symbol{}; - replace_symbol.insert(*symbol_expr, *possible_value); + replace_symbol.insert(*symbol_expr, replacement_expr); replace_symbol(modified_condition); // This do_simplify() is needed for the following reason: if `condition` is From c361ff71098e90f5504baeabba22e95a0fe063fc Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:50:59 +0000 Subject: [PATCH 12/18] Move pointer_offset_sum Move it from pointer_analysis to util. It is not currently used in the cbmc codebase. The only changes were to the names of the guards in the header file and the includes. --- src/pointer-analysis/Makefile | 1 - src/util/Makefile | 1 + src/{pointer-analysis => util}/pointer_offset_sum.cpp | 6 +++--- src/{pointer-analysis => util}/pointer_offset_sum.h | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) rename src/{pointer-analysis => util}/pointer_offset_sum.cpp (86%) rename src/{pointer-analysis => util}/pointer_offset_sum.h (63%) diff --git a/src/pointer-analysis/Makefile b/src/pointer-analysis/Makefile index edbb7ba6a43..3fd0d40712f 100644 --- a/src/pointer-analysis/Makefile +++ b/src/pointer-analysis/Makefile @@ -1,6 +1,5 @@ SRC = add_failed_symbols.cpp \ goto_program_dereference.cpp \ - pointer_offset_sum.cpp \ rewrite_index.cpp \ show_value_sets.cpp \ value_set.cpp \ diff --git a/src/util/Makefile b/src/util/Makefile index 8c4c6b5beb4..ebb2a015b19 100644 --- a/src/util/Makefile +++ b/src/util/Makefile @@ -53,6 +53,7 @@ SRC = allocate_objects.cpp \ parse_options.cpp \ parser.cpp \ pointer_offset_size.cpp \ + pointer_offset_sum.cpp \ pointer_predicates.cpp \ rational.cpp \ rational_tools.cpp \ diff --git a/src/pointer-analysis/pointer_offset_sum.cpp b/src/util/pointer_offset_sum.cpp similarity index 86% rename from src/pointer-analysis/pointer_offset_sum.cpp rename to src/util/pointer_offset_sum.cpp index 19baae143d2..dc34d0eb0ce 100644 --- a/src/pointer-analysis/pointer_offset_sum.cpp +++ b/src/util/pointer_offset_sum.cpp @@ -11,13 +11,13 @@ Author: Daniel Kroening, kroening@kroening.com #include "pointer_offset_sum.h" -#include +#include "std_expr.h" exprt pointer_offset_sum(const exprt &a, const exprt &b) { - if(a.id()==ID_unknown) + if(a.id() == ID_unknown) return a; - else if(b.id()==ID_unknown) + else if(b.id() == ID_unknown) return b; else if(a.is_zero()) return b; diff --git a/src/pointer-analysis/pointer_offset_sum.h b/src/util/pointer_offset_sum.h similarity index 63% rename from src/pointer-analysis/pointer_offset_sum.h rename to src/util/pointer_offset_sum.h index e4e6dc238ff..128151930cb 100644 --- a/src/pointer-analysis/pointer_offset_sum.h +++ b/src/util/pointer_offset_sum.h @@ -9,11 +9,11 @@ Author: Daniel Kroening, kroening@kroening.com /// \file /// Pointer Dereferencing -#ifndef CPROVER_POINTER_ANALYSIS_POINTER_OFFSET_SUM_H -#define CPROVER_POINTER_ANALYSIS_POINTER_OFFSET_SUM_H +#ifndef CPROVER_UTIL_POINTER_OFFSET_SUM_H +#define CPROVER_UTIL_POINTER_OFFSET_SUM_H -#include +#include "expr.h" exprt pointer_offset_sum(const exprt &a, const exprt &b); -#endif // CPROVER_POINTER_ANALYSIS_POINTER_OFFSET_SUM_H +#endif // CPROVER_UTIL_POINTER_OFFSET_SUM_H From f3c31ff0e6d2ea95562f1c4779f77e6f227e98df Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:51:00 +0000 Subject: [PATCH 13/18] Fix bug in simplify_dereference_rec The two operands to the plus_exprt don't need to have the same type as each other. --- src/util/simplify_expr.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index b2275cb421b..eed4a723368 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -21,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "mathematical_expr.h" #include "namespace.h" #include "pointer_offset_size.h" +#include "pointer_offset_sum.h" #include "rational.h" #include "rational_tools.h" #include "simplify_utils.h" @@ -904,7 +905,7 @@ bool simplify_exprt::simplify_dereference(exprt &expr) { index_exprt idx( old.array(), - plus_exprt(old.index(), pointer.op1()), + pointer_offset_sum(old.index(), pointer.op1()), old.array().type().subtype()); simplify_rec(idx); expr.swap(idx); From 7e3ca83d9a7f25586231b984ac6f8f368cfa22a3 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:51:02 +0000 Subject: [PATCH 14/18] Skip unknown elements in value set We should try to filter them out --- src/goto-symex/symex_main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index a9cc8bcf895..1a45f0d12bd 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -617,6 +617,11 @@ void goto_symext::try_filter_value_sets( // used if the condition is false, and vice versa. for(const auto &value_set_element : value_set_elements) { + if(value_set_element.id() == ID_unknown) + { + continue; + } + const bool exclude_null_derefs = false; value_set_dereferencet::valuet possible_value = value_set_dereferencet::build_reference_to( From 805a4091939aa5e2271892450aafb4f0248e3ed7 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:51:03 +0000 Subject: [PATCH 15/18] Add tests for symex value-set filtering --- .../symex_should_filter_value_sets/main.c | 146 ++++++++++++++++++ .../symex_should_filter_value_sets/test.desc | 29 ++++ 2 files changed, 175 insertions(+) create mode 100644 regression/cbmc/symex_should_filter_value_sets/main.c create mode 100644 regression/cbmc/symex_should_filter_value_sets/test.desc diff --git a/regression/cbmc/symex_should_filter_value_sets/main.c b/regression/cbmc/symex_should_filter_value_sets/main.c new file mode 100644 index 00000000000..6f9910184c3 --- /dev/null +++ b/regression/cbmc/symex_should_filter_value_sets/main.c @@ -0,0 +1,146 @@ +#include + +static void noop() +{ +} + +int main(int argc, char **argv) +{ + __CPROVER_assume(argc == 5); + + int a = 2; + int b = 1; + int *ptr_to_a_or_b = argv[0][0] == '0' ? &a : &b; + + // Should work (value-set filtered by assume): + int *p1 = ptr_to_a_or_b; + __CPROVER_assume(p1 != &a); + int c1 = *p1; + + int *p2 = ptr_to_a_or_b; + __CPROVER_assume(*p2 != 2); + int c2 = *p2; + + // Should work (value-set filtered by else): + int c3 = 0; + int *p3 = ptr_to_a_or_b; + if(p3 == &a) + { + } + else + { + c3 = *p3; + } + + int c4 = 0; + int *p4 = ptr_to_a_or_b; + if(*p4 == 2) + { + } + else + { + c4 = *p4; + } + + // Should work (value-set filtered by if): + int c5 = 0; + int *p5 = ptr_to_a_or_b; + if(p5 != &a) + { + c5 = *p5; + } + + int c6 = 0; + int *p6 = ptr_to_a_or_b; + if(*p6 != 2) + { + c6 = *p6; + } + + // Should work (value-set filtered by assume before a backward goto): + int *p7 = ptr_to_a_or_b; + goto check7; + +divide7: + int c7 = *p7; + goto end_test7; + +check7: + __CPROVER_assume(p7 != &a); + goto divide7; + +end_test7: + + int *p8 = ptr_to_a_or_b; + goto check8; + +divide8: + int c8 = *p8; + goto end_test8; + +check8: + __CPROVER_assume(*p8 != 2); + goto divide8; + +end_test8: + + // Should work (value-set filtered by confluence of if and else): + int *p9 = ptr_to_a_or_b; + if(argv[1][0] == '0') + __CPROVER_assume(p9 != &a); + else + __CPROVER_assume(p9 != &a); + int c9 = *p9; + + int *p10 = ptr_to_a_or_b; + if(argv[2][0] == '0') + __CPROVER_assume(*p10 != 2); + else + __CPROVER_assume(*p10 != 2); + int c10 = *p10; + + // Should work (value-set filtered by assume, write through an unrelated + // pointer has no effect): + int c = 0; + int *ptr_to_c = &c; + + int *p11 = ptr_to_a_or_b; + __CPROVER_assume(p11 != &a); + *ptr_to_c = 3; + int c11 = *p11; + + int *p12 = ptr_to_a_or_b; + __CPROVER_assume(*p12 != 2); + *ptr_to_c = 4; + int c12 = *p12; + + // Should work (value-set filtered by assume, function call has no effect): + int *p13 = ptr_to_a_or_b; + __CPROVER_assume(p13 != &a); + noop(); + int c13 = *p13; + + int *p14 = ptr_to_a_or_b; + __CPROVER_assume(*p14 != 2); + noop(); + int c14 = *p14; + + // Shouldn't work (unsafe as value-set only filtered by if on one branch): + int *p15 = ptr_to_a_or_b; + if(argv[3][0] == '0') + __CPROVER_assume(p15 != &a); + else + { + } + int c15 = *p15; + + int *p16 = ptr_to_a_or_b; + if(argv[4][0] == '0') + __CPROVER_assume(*p16 != 2); + else + { + } + int c16 = *p16; + + assert(0); +} diff --git a/regression/cbmc/symex_should_filter_value_sets/test.desc b/regression/cbmc/symex_should_filter_value_sets/test.desc new file mode 100644 index 00000000000..84401510cc7 --- /dev/null +++ b/regression/cbmc/symex_should_filter_value_sets/test.desc @@ -0,0 +1,29 @@ +CORE paths-lifo-expected-failure +main.c +--show-vcc +^EXIT=0$ +^SIGNAL=0$ +main::1::c1!0@1#. = 1 +main::1::c2!0@1#. = 1 +main::1::c3!0@1#. = 1 +main::1::c4!0@1#. = 1 +main::1::c5!0@1#. = 1 +main::1::c6!0@1#. = 1 +main::1::c7!0@1#. = 1 +main::1::c8!0@1#. = 1 +main::1::c9!0@1#. = 1 +main::1::c10!0@1#. = 1 +main::1::c11!0@1#. = 1 +main::1::c12!0@1#. = 1 +main::1::c13!0@1#. = 1 +main::1::c14!0@1#. = 1 +-- +^warning: ignoring +main::1::c15!0@1#. = 1 +main::1::c16!0@1#. = 1 +-- +This test does not work in single-path mode as it relies on convergence +behaviour (p15 and p16 are restricted on only one branch of an if-else diamond). + +See comments in main.c for explanations on why each of the assignments +is or isn't expected to appear in the output. From 6d9d1f8688f97c869cc877577c10fb2ba52911d6 Mon Sep 17 00:00:00 2001 From: Owen Date: Thu, 21 Mar 2019 21:51:04 +0000 Subject: [PATCH 16/18] Add tests for value-set filtering for virtual method calls The test without a deref in the this argument works with value-set filtering on its own. The test with a deref in the this argument also requires the use temporary variable for the this argument of virtual method calls (though we only need to do this when a dispatch table is made and when the this argument contains a dereference). --- .../jbmc/virtual_filter_value_sets/A.class | Bin 0 -> 320 bytes .../jbmc/virtual_filter_value_sets/B.class | Bin 0 -> 337 bytes .../virtual_filter_value_sets/Container.class | Bin 0 -> 292 bytes .../test_with_deref.desc | 18 ++++++++ .../test_without_deref.desc | 18 ++++++++ .../virtual_filter_value_sets.class | Bin 0 -> 1211 bytes .../virtual_filter_value_sets.java | 40 ++++++++++++++++++ 7 files changed, 76 insertions(+) create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/A.class create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/B.class create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/Container.class create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.class create mode 100644 jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.java diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/A.class b/jbmc/regression/jbmc/virtual_filter_value_sets/A.class new file mode 100644 index 0000000000000000000000000000000000000000..d83b016d0dcface7aaa141b03b72e2c8cbe2cdf3 GIT binary patch literal 320 zcmYk0!AiqG5QhIr(ne#owGSX3Rm5DpDMAs1La+x*uVE9nx@ApZv&mC=@l@!+2k@c9 z*{x_7c4q#6|8JSE@69KGYm5_w7{(aII41OORi*45Av`-@5Tg6~SrUess^nw)R!B1! zg=S}*)uqr2VU*LqW@KMg!)P|SA<)u)G9@HWb!$rbpd5J8DPvoq^QF>On!FRbm3bp= zbF~tk7$8EA;R#{nbgD)5lFo{iEGuxYflK91s1J?O<-MQ8;<2ws$4!?td8Wo=-y$*=Mv2!aQHfImu{ zEP@CgzQ@dbGkl+4?;ik8v6~{qxQ`yT5^N{f5eQ2o?*+t_KzycG+MNsZ4i4v3E~+~v zFwFHzT{lk)WpCxekQwLIQkuE6y2C$Y=pJ-UYd)JWt@I(xOmC{jF4d*(jC-xNu92oF zwQ2@3Dz=1h?y{BO|j2cL%vSX@!SFy&xrfnPP_s{M{f`>UQXF# j(%spBJ@b3SxlSaofg$@}^DBa9iUEGjgMZBS6ot=C^JCL$Y9BycstdDmrwCGT721W;ZMaECoiZjcndH4(2`+p9AF6mK zh+H_F^Igsz=I8MB4d4dL6cc0#770!X370QgnH^zLJlqoEJKbn^FY)4XO9)r(P7$&~ zH)`F#SIRzfWrQq?wq~7ztte-zO|Q&kZ-P`KH>d+Tk35`~V=C5Yg!1i;|Bd<>Lm3u0BB@ Z0s>4$W5Ga%RCGT22s3g1(6MNMlV36uJ9Pj6 literal 0 HcmV?d00001 diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc b/jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc new file mode 100644 index 00000000000..be92ae0f2fb --- /dev/null +++ b/jbmc/regression/jbmc/virtual_filter_value_sets/test_with_deref.desc @@ -0,0 +1,18 @@ +CORE +virtual_filter_value_sets.class +--show-vcc --function virtual_filter_value_sets.test_with_deref +^EXIT=0$ +^SIGNAL=0$ +java::B\.f:\(\)I#return_value!0#.* = 9$ +-- +java::B\.f:\(\)I#return_value!0#.* = .*byte_extract_.*_endian +^warning: ignoring +-- +When B.f is called on `c.a_field`, it is guarded by a conditional which implies +that `c.a_field` must be a B. Symex should filter the value set for `c.a_field` +to remove anything which doesn't satisfy that condition. It will then be able +to determine that the return value of B.f is 9. If it thinks that `c.a_field` +might contain an A then it will create a more complicated expression for the +return value of B.f, which will include byte_extract_little_endian or +byte_extract_big_endian, as this is what it will produce when trying to read +the field `int flag` from a class that doesn't have any fields. diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc b/jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc new file mode 100644 index 00000000000..f1ae1c830e1 --- /dev/null +++ b/jbmc/regression/jbmc/virtual_filter_value_sets/test_without_deref.desc @@ -0,0 +1,18 @@ +CORE +virtual_filter_value_sets.class +--show-vcc --function virtual_filter_value_sets.test_without_deref +^EXIT=0$ +^SIGNAL=0$ +java::B\.f:\(\)I#return_value!0#.* = 9$ +-- +java::B\.f:\(\)I#return_value!0#.* = .*byte_extract_.*_endian +^warning: ignoring +-- +When B.f is called on `a_or_b`, it is guarded by a conditional which implies +that `a_or_b` must be a B. Symex should filter the value set for `a_or_b` to +remove anything which doesn't satisfy that condition. It will then be able +to determine that the return value of B.f is 9. If it thinks that `a_or_b` +might contain an A then it will create a more complicated expression for the +return value of B.f, which will include byte_extract_little_endian or +byte_extract_big_endian, as this is what it will produce when trying to read +the field `int flag` from a class that doesn't have any fields. diff --git a/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.class b/jbmc/regression/jbmc/virtual_filter_value_sets/virtual_filter_value_sets.class new file mode 100644 index 0000000000000000000000000000000000000000..3de773d4e00e78d343accebd6d53a47b090a5b56 GIT binary patch literal 1211 zcmaJ=*-{fh6g{0xGFb)^w#cFp*$hjFNWdkCK-4O$0zP0-B`>K-(!k)Dv1TUtE%F0C zy8uOO>PM&d8;A?Vg;irH)^j+(thqG3v?H(vTknaw&CqtLB-uO9D$+2fnmEf-7M}H zrYr5@fo|4h(UGo`<4spOZt;!b?pZarSdq5eAx>)BgNs(JYDK!ml4Y6XqmxEqm3KtR z^VaAE^`d1LxeeOVWnG><73kP>_43Pg{dI!~+r!R~bsOm%MwE$B$Xiucryx7W4he*o z%4SP+WYenIW%R0uO67c0hWYHzYih8#R-!j#4tf7aE(7kvOyx-1j5yZh7i&`OOi$$ Date: Tue, 26 Mar 2019 10:10:47 +0000 Subject: [PATCH 17/18] Speed up erasing from value sets --- src/goto-symex/symex_main.cpp | 28 ++++++++++++---------------- src/pointer-analysis/value_set.cpp | 18 +++++++++++------- src/pointer-analysis/value_set.h | 5 ++++- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index 1a45f0d12bd..3755bcc5ca4 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -606,10 +606,10 @@ void goto_symext::try_filter_value_sets( value_setst::valuest value_set_elements; original_value_set.get_value_set(*symbol_expr, value_set_elements, ns); - std::vector delete_from_jump_taken_value_set; - std::vector delete_from_jump_not_taken_value_set; - delete_from_jump_taken_value_set.reserve(value_set_elements.size()); - delete_from_jump_not_taken_value_set.reserve(value_set_elements.size()); + std::unordered_set erase_from_jump_taken_value_set; + std::unordered_set erase_from_jump_not_taken_value_set; + erase_from_jump_taken_value_set.reserve(value_set_elements.size()); + erase_from_jump_not_taken_value_set.reserve(value_set_elements.size()); // Try evaluating the condition with the symbol replaced by a pointer to each // one of its possible values in turn. If that leads to a true for some @@ -667,29 +667,25 @@ void goto_symext::try_filter_value_sets( if(jump_taken_value_set && modified_condition.is_false()) { - delete_from_jump_taken_value_set.emplace_back(value_set_element); + erase_from_jump_taken_value_set.insert(value_set_element); } else if(jump_not_taken_value_set && modified_condition.is_true()) { - delete_from_jump_not_taken_value_set.emplace_back(value_set_element); + erase_from_jump_not_taken_value_set.insert(value_set_element); } } - if(jump_taken_value_set && !delete_from_jump_taken_value_set.empty()) + if(jump_taken_value_set && !erase_from_jump_taken_value_set.empty()) { value_sett::entryt *entry = jump_taken_value_set->get_entry_for_symbol( symbol_expr->get_identifier(), symbol_type, "", ns); - for(const exprt &value_to_erase : delete_from_jump_taken_value_set) - { - jump_taken_value_set->erase_value_from_entry(*entry, value_to_erase); - } + jump_taken_value_set->erase_values_from_entry( + *entry, erase_from_jump_taken_value_set); } - if(jump_not_taken_value_set && !delete_from_jump_not_taken_value_set.empty()) + if(jump_not_taken_value_set && !erase_from_jump_not_taken_value_set.empty()) { value_sett::entryt *entry = jump_not_taken_value_set->get_entry_for_symbol( symbol_expr->get_identifier(), symbol_type, "", ns); - for(const exprt &value_to_erase : delete_from_jump_not_taken_value_set) - { - jump_not_taken_value_set->erase_value_from_entry(*entry, value_to_erase); - } + jump_not_taken_value_set->erase_values_from_entry( + *entry, erase_from_jump_not_taken_value_set); } } diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index c6e21821eba..a8a92ee3db7 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -1624,24 +1624,28 @@ void value_sett::guard( } } -void value_sett::erase_value_from_entry( +void value_sett::erase_values_from_entry( entryt &entry, - const exprt &value_to_erase) + const std::unordered_set &values_to_erase) { std::vector keys_to_erase; - for(const auto &key_value : entry.object_map.read()) + for(auto &key_value : entry.object_map.read()) { const auto &rhs_object = to_expr(key_value); - if(rhs_object == value_to_erase) + if(values_to_erase.count(rhs_object)) { keys_to_erase.emplace_back(key_value.first); } } DATA_INVARIANT( - keys_to_erase.size() == 1, - "value_sett::erase_value_from_entry() should erase exactly one value"); + keys_to_erase.size() == values_to_erase.size(), + "value_sett::erase_value_from_entry() should erase exactly one value for " + "each element in the set it is given"); - entry.object_map.write().erase(keys_to_erase[0]); + for(const auto &key_to_erase : keys_to_erase) + { + entry.object_map.write().erase(key_to_erase); + } } diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index caa22f98646..dbcae92ba15 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -13,6 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #define CPROVER_POINTER_ANALYSIS_VALUE_SET_H #include +#include #include #include @@ -483,7 +484,9 @@ class value_sett const std::string &suffix, const namespacet &ns) const; - void erase_value_from_entry(entryt &entry, const exprt &value_to_erase); + void erase_values_from_entry( + entryt &entry, + const std::unordered_set &values_to_erase); protected: /// Reads the set of objects pointed to by `expr`, including making From 05444704e23bd4b04d7f302865dc13660928b28d Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 26 Mar 2019 11:02:36 +0000 Subject: [PATCH 18/18] Remove const-cast from value_sett::get_entry --- src/pointer-analysis/value_set.cpp | 34 ++++++++++++++++++++++-------- src/pointer-analysis/value_set.h | 15 +++++++++++++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index a8a92ee3db7..cb8934722f5 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -57,7 +57,8 @@ value_sett::entryt *value_sett::find_entry(const value_sett::idt &id) const value_sett::entryt * value_sett::find_entry(const value_sett::idt &id) const { - return const_cast(this)->find_entry(id); + auto found = values.find(id); + return found == values.end() ? nullptr : &found->second; } value_sett::entryt &value_sett::get_entry(const entryt &e, const typet &type) @@ -371,11 +372,16 @@ static std::string strip_first_field_from_suffix( return suffix.substr(field.length() + 1); } -value_sett::entryt *value_sett::get_entry_for_symbol( +template +auto value_sett::get_entry_for_symbol( + maybe_const_value_sett &value_set, const irep_idt identifier, const typet &type, const std::string &suffix, - const namespacet &ns) + const namespacet &ns) -> + typename std::conditional::value, + const value_sett::entryt *, + value_sett::entryt *>::type { if( type.id() != ID_pointer && type.id() != ID_signedbv && @@ -393,7 +399,7 @@ value_sett::entryt *value_sett::get_entry_for_symbol( : type; // look it up - value_sett::entryt *entry = find_entry(id2string(identifier) + suffix); + auto *entry = value_set.find_entry(id2string(identifier) + suffix); // try first component name as suffix if not yet found if( @@ -406,27 +412,37 @@ value_sett::entryt *value_sett::get_entry_for_symbol( const irep_idt &first_component_name = struct_union_type.components().front().get_name(); - entry = find_entry( + entry = value_set.find_entry( id2string(identifier) + "." + id2string(first_component_name) + suffix); } if(!entry) { // not found? try without suffix - entry = find_entry(identifier); + entry = value_set.find_entry(identifier); } return entry; } +// Explicitly instantiate the two possible versions of the method above: + +value_sett::entryt *value_sett::get_entry_for_symbol( + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) +{ + return get_entry_for_symbol(*this, identifier, type, suffix, ns); +} + const value_sett::entryt *value_sett::get_entry_for_symbol( - const irep_idt identifier, + irep_idt identifier, const typet &type, const std::string &suffix, const namespacet &ns) const { - return const_cast(this)->get_entry_for_symbol( - identifier, type, suffix, ns); + return get_entry_for_symbol(*this, identifier, type, suffix, ns); } void value_sett::get_value_set_rec( diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index dbcae92ba15..5138f0c0e79 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -13,6 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #define CPROVER_POINTER_ANALYSIS_VALUE_SET_H #include +#include #include #include @@ -464,6 +465,20 @@ class value_sett exprt &expr, const namespacet &ns) const; +private: + /// Helper method for \ref get_entry_for_symbol + template + static auto get_entry_for_symbol( + maybe_const_value_sett &value_set, + irep_idt identifier, + const typet &type, + const std::string &suffix, + const namespacet &ns) -> + typename std::conditional::value, + const value_sett::entryt *, + value_sett::entryt *>::type; + +public: /// Get the entry for the symbol and suffix /// \param identifier: The identifier for the symbol /// \param type: The type of the symbol