From 661f2726bf9c2fc80fd329041e651e15785d5daf Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Thu, 18 Mar 2021 14:08:49 +0000 Subject: [PATCH 1/3] Add CSE dereference feature --- regression/cbmc/double_deref/README | 6 + .../cbmc/double_deref/double_deref.desc | 2 +- .../double_deref/double_deref_with_cast.desc | 2 +- .../double_deref_with_member.desc | 2 +- .../double_deref_with_pointer_arithmetic.desc | 2 +- ..._with_pointer_arithmetic_single_alias.desc | 2 +- src/goto-symex/goto_state.cpp | 21 ++++ src/goto-symex/goto_state.h | 16 +++ src/goto-symex/goto_symex.h | 7 +- src/goto-symex/symex_dereference.cpp | 103 +++++++++++++++--- 10 files changed, 144 insertions(+), 19 deletions(-) diff --git a/regression/cbmc/double_deref/README b/regression/cbmc/double_deref/README index 55292da9cdc..2aa63296031 100644 --- a/regression/cbmc/double_deref/README +++ b/regression/cbmc/double_deref/README @@ -25,3 +25,9 @@ double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)* double_deref_with_member.desc -- a double-deref with an intervening member expression (p->field1->field2), should use a let-expression double_deref_with_pointer_arithmetic.desc -- a double-deref with intervening pointer arithmetic (p[idx1][idx2]), should use a let-expression *_single_alias.desc -- variants of the above where the first dereference points to a single possible object, so no let-expression is necessary + +Why no-dereference-cache? +------------------------- + +Because these tests test for specific VCCs to do with multiple nested complex +dereferences and the dereference caching rewrites these. diff --git a/regression/cbmc/double_deref/double_deref.desc b/regression/cbmc/double_deref/double_deref.desc index b5daf7673f3..d3cc5eadcfb 100644 --- a/regression/cbmc/double_deref/double_deref.desc +++ b/regression/cbmc/double_deref/double_deref.desc @@ -1,4 +1,4 @@ -CORE +CORE no-dereference-cache double_deref.c --show-vcc \{1\} \(main::1::pptr!0@1#2 = address_of\(main::1::ptr[12]!0@1\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12]\) diff --git a/regression/cbmc/double_deref/double_deref_with_cast.desc b/regression/cbmc/double_deref/double_deref_with_cast.desc index e5b1f592416..10bce49c798 100644 --- a/regression/cbmc/double_deref/double_deref_with_cast.desc +++ b/regression/cbmc/double_deref/double_deref_with_cast.desc @@ -1,4 +1,4 @@ -CORE +CORE no-dereference-cache double_deref_with_cast.c --show-vcc \{1\} \(cast\(main::1::pptr!0@1#2, signedbv\[32\]\*\*\) = address_of\(main::1::ptr2!0@1\) \? main::argc!0@1#1 = 2 \: main::argc!0@1#1 = 1\) diff --git a/regression/cbmc/double_deref/double_deref_with_member.desc b/regression/cbmc/double_deref/double_deref_with_member.desc index 85156557ba2..667b6897bd2 100644 --- a/regression/cbmc/double_deref/double_deref_with_member.desc +++ b/regression/cbmc/double_deref/double_deref_with_member.desc @@ -1,4 +1,4 @@ -CORE +CORE no-dereference-cache double_deref_with_member.c --show-vcc ^\{1\} \(main::1::cptr!0@1#2 = address_of\(main::1::container2!0@1\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\) diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc index df655168340..3b35123b077 100644 --- a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc @@ -1,4 +1,4 @@ -CORE +CORE no-dereference-cache double_deref_with_pointer_arithmetic.c --show-vcc ^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = \{ symex_dynamic::dynamic_object1#3\[\[0\]\], symex_dynamic::dynamic_object1#3\[\[1\]\] \}\[cast\(mod #source_location=""\(main::argc!0@1#1, 2\), signedbv\[64\]\)\] diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc index cf6fda8319a..75314b5e8d0 100644 --- a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc @@ -1,4 +1,4 @@ -CORE +CORE no-dereference-cache double_deref_with_pointer_arithmetic_single_alias.c --show-vcc \{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object2\) \? main::argc!0@1#1 = 1 : symex::invalid_object!0#0 = main::argc!0@1#1\) diff --git a/src/goto-symex/goto_state.cpp b/src/goto-symex/goto_state.cpp index 4803654fc51..c9e0d2c13f0 100644 --- a/src/goto-symex/goto_state.cpp +++ b/src/goto-symex/goto_state.cpp @@ -12,6 +12,27 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include +optionalt +dereference_cachet::lookup(const exprt &dereference) const +{ + auto it = cache.find(dereference); + if(it == cache.end()) + { + return nullopt; + } + else + { + return {it->second}; + } +} + +void dereference_cachet::insert( + exprt new_cached_expr, + symbol_exprt new_cache_symbol) +{ + cache.emplace(std::move(new_cached_expr), std::move(new_cache_symbol)); +} + /// Print the constant propagation map in a human-friendly format. /// This is primarily for use from the debugger; please don't delete me just /// because there aren't any current callers. diff --git a/src/goto-symex/goto_state.h b/src/goto-symex/goto_state.h index f2ce7660229..208a06223c5 100644 --- a/src/goto-symex/goto_state.h +++ b/src/goto-symex/goto_state.h @@ -12,6 +12,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #ifndef CPROVER_GOTO_SYMEX_GOTO_STATE_H #define CPROVER_GOTO_SYMEX_GOTO_STATE_H +#include #include #include @@ -24,6 +25,19 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com // by the parent class. class goto_symex_statet; +/// This is used for eliminating repeated complicated dereferences. +/// This is pretty much just a simple wrapper around a map. +/// \see goto_symext::dereference_rec +struct dereference_cachet +{ +private: + std::unordered_map cache; + +public: + optionalt lookup(const exprt &pointer) const; + void insert(exprt new_cached_pointer_expr, symbol_exprt new_cache_symbol); +}; + /// Container for data that varies per program point, e.g. the constant /// propagator state, when state needs to branch. This is copied out of /// goto_symex_statet at a control-flow fork and then back into it at a @@ -38,6 +52,8 @@ class goto_statet symex_level2t level2; public: + dereference_cachet dereference_cache; + const symex_level2t &get_level2() const { return level2; diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 5663bde1703..49f3edae121 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -299,7 +299,12 @@ class goto_symext exprt make_auto_object(const typet &, statet &); virtual void dereference(exprt &, statet &, bool write); - void dereference_rec(exprt &, statet &, bool write); + symbol_exprt cache_dereference(exprt &dereference_result, statet &state); + void dereference_rec( + exprt &expr, + statet &state, + bool write, + bool is_in_quantifier); exprt address_arithmetic( const exprt &, statet &, diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 9b3d0370dd5..2aeef379434 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -17,11 +17,14 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include +#include "expr_skeleton.h" +#include "symex_assign.h" #include "symex_dereference_state.h" /// Transforms an lvalue expression by replacing any dereference operations it @@ -71,7 +74,7 @@ exprt goto_symext::address_arithmetic( // there could be further dereferencing in the offset exprt offset=be.offset(); - dereference_rec(offset, state, false); + dereference_rec(offset, state, false, false); result=plus_exprt(result, offset); @@ -107,14 +110,14 @@ exprt goto_symext::address_arithmetic( // just grab the pointer, but be wary of further dereferencing // in the pointer itself result=to_dereference_expr(expr).pointer(); - dereference_rec(result, state, false); + dereference_rec(result, state, false, false); } else if(expr.id()==ID_if) { if_exprt if_expr=to_if_expr(expr); // the condition is not an address - dereference_rec(if_expr.cond(), state, false); + dereference_rec(if_expr.cond(), state, false, false); // recursive call if_expr.true_case() = @@ -131,7 +134,7 @@ exprt goto_symext::address_arithmetic( { // give up, just dereference result=expr; - dereference_rec(result, state, false); + dereference_rec(result, state, false, false); // turn &array into &array[0] if(result.type().id() == ID_array && !keep_array) @@ -191,6 +194,56 @@ exprt goto_symext::address_arithmetic( return result; } +symbol_exprt +goto_symext::cache_dereference(exprt &dereference_result, statet &state) +{ + auto const cache_key = [&] { + auto cache_key = + state.field_sensitivity.apply(ns, state, dereference_result, false); + if(auto let_expr = expr_try_dynamic_cast(dereference_result)) + { + let_expr->value() = state.rename(let_expr->value(), ns).get(); + } + else + { + cache_key = state.rename(cache_key, ns).get(); + } + return cache_key; + }(); + + if(auto cached = state.dereference_cache.lookup(cache_key)) + { + return *cached; + } + + auto const &cache_symbol = get_fresh_aux_symbol( + cache_key.type(), + "symex", + "dereference_cache", + dereference_result.source_location(), + ID_C, + ns, + state.symbol_table); + + // we need to lift possible lets + // (come from the value set to avoid repeating complex pointer comparisons) + auto cache_value = cache_key; + lift_lets(state, cache_value); + + exprt::operandst guard{}; + auto assign = symex_assignt{ + state, symex_targett::assignment_typet::STATE, ns, symex_config, target}; + + assign.assign_symbol( + to_ssa_expr(state.rename(cache_symbol.symbol_expr(), ns).get()), + expr_skeletont{}, + cache_value, + guard); + + state.dereference_cache.insert(cache_key, cache_symbol.symbol_expr()); + return cache_symbol.symbol_expr(); +} + /// If \p expr is a \ref dereference_exprt, replace it with explicit references /// to the objects it may point to. Otherwise recursively apply this function to /// \p expr's operands, with special cases for address-of (handled by \ref @@ -198,7 +251,11 @@ exprt goto_symext::address_arithmetic( /// such as `&struct.flexible_array[0]` (see inline comments in code). /// For full details of this method's pointer replacement and potential side- /// effects see \ref goto_symext::dereference -void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) +void goto_symext::dereference_rec( + exprt &expr, + statet &state, + bool write, + bool is_in_quantifier) { if(expr.id()==ID_dereference) { @@ -221,7 +278,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) tmp1.swap(to_dereference_expr(expr).pointer()); // first make sure there are no dereferences in there - dereference_rec(tmp1, state, false); + dereference_rec(tmp1, state, false, is_in_quantifier); // Depending on the nature of the pointer expression, the recursive deref // operation might have introduced a construct such as @@ -271,10 +328,29 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) dereference.dereference(tmp1, symex_config.show_points_to_sets); // std::cout << "**** " << format(tmp2) << '\n'; - expr.swap(tmp2); // this may yield a new auto-object - trigger_auto_object(expr, state); + trigger_auto_object(tmp2, state); + + // If the dereference result is not a complicated expression + // (i.e. of the form + // [let p = in ] + // (p == &something ? something : ...)) + // we should just return it unchanged. + // also if we are on the lhs of an assignment we should also not attempt to + // go to the cache we cannot do this for quantified expressions because this + // would result in us referencing quantifier variables outside the scope of + // the quantifier. + if( + !write && !is_in_quantifier && + (tmp2.id() == ID_if || tmp2.id() == ID_let)) + { + expr = cache_dereference(tmp2, state); + } + else + { + expr = tmp2; + } } else if( expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member && @@ -293,7 +369,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) tmp.add_source_location()=expr.source_location(); // recursive call - dereference_rec(tmp, state, write); + dereference_rec(tmp, state, write, is_in_quantifier); expr.swap(tmp); } @@ -331,17 +407,18 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) to_address_of_expr(tc_op).object(), from_integer(0, index_type()))); - dereference_rec(expr, state, write); + dereference_rec(expr, state, write, is_in_quantifier); } else { - dereference_rec(tc_op, state, write); + dereference_rec(tc_op, state, write, is_in_quantifier); } } else { + bool is_quantifier = expr.id() == ID_forall || expr.id() == ID_exists; Forall_operands(it, expr) - dereference_rec(*it, state, write); + dereference_rec(*it, state, write, is_in_quantifier || is_quantifier); } } @@ -409,7 +486,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write) }); // start the recursion! - dereference_rec(expr, state, write); + dereference_rec(expr, state, write, false); // dereferencing may introduce new symbol_exprt // (like __CPROVER_memory) expr = state.rename(std::move(expr), ns).get(); From 7714f3f6f5c2fabd7cae857a9f4dc6d59e926ec6 Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Mon, 22 Mar 2021 16:13:05 +0000 Subject: [PATCH 2/3] Add --symex-cache-dereferences flag This makes the dereference caching behaviour configurable --- jbmc/src/jbmc/jbmc_parse_options.cpp | 3 ++ .../dereference-cache-flag/test-no-cache.desc | 14 +++++++ .../test-with-cache.desc | 10 +++++ regression/cbmc/dereference-cache-flag/test.c | 7 ++++ regression/cbmc/double_deref/README | 6 --- .../cbmc/double_deref/double_deref.desc | 2 +- .../double_deref/double_deref_with_cast.desc | 2 +- .../double_deref_with_member.desc | 2 +- .../double_deref_with_pointer_arithmetic.desc | 2 +- ..._with_pointer_arithmetic_single_alias.desc | 2 +- src/cbmc/cbmc_parse_options.cpp | 3 ++ src/goto-checker/bmc_util.h | 6 ++- src/goto-symex/goto_state.h | 1 - src/goto-symex/symex_config.h | 8 ++++ src/goto-symex/symex_dereference.cpp | 38 ++++++++++--------- src/goto-symex/symex_main.cpp | 3 +- 16 files changed, 77 insertions(+), 32 deletions(-) create mode 100644 regression/cbmc/dereference-cache-flag/test-no-cache.desc create mode 100644 regression/cbmc/dereference-cache-flag/test-with-cache.desc create mode 100644 regression/cbmc/dereference-cache-flag/test.c diff --git a/jbmc/src/jbmc/jbmc_parse_options.cpp b/jbmc/src/jbmc/jbmc_parse_options.cpp index 75988d1eab0..8a95b6b4057 100644 --- a/jbmc/src/jbmc/jbmc_parse_options.cpp +++ b/jbmc/src/jbmc/jbmc_parse_options.cpp @@ -425,6 +425,9 @@ void jbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option("validate-goto-model", true); } + options.set_option( + "symex-cache-dereferences", cmdline.isset("symex-cache-dereferences")); + PARSE_OPTIONS_GOTO_TRACE(cmdline, options); if(cmdline.isset("no-lazy-methods")) diff --git a/regression/cbmc/dereference-cache-flag/test-no-cache.desc b/regression/cbmc/dereference-cache-flag/test-no-cache.desc new file mode 100644 index 00000000000..23ee595f1e5 --- /dev/null +++ b/regression/cbmc/dereference-cache-flag/test-no-cache.desc @@ -0,0 +1,14 @@ +CORE +test.c +--show-vcc +\(main::1::p!0@1#2 = address_of +^EXIT=0$ +^SIGNAL=0$ +-- +-- +This is just checking that the --symex-cache-dereferences flag actually turns +dereference caching on and off. + +I would like to match the expression more precisely, but the order of +comparisons for address of depends on platform-specific logic (unordered_xxx), +and the corresponding regex would look ridiculous. diff --git a/regression/cbmc/dereference-cache-flag/test-with-cache.desc b/regression/cbmc/dereference-cache-flag/test-with-cache.desc new file mode 100644 index 00000000000..b4342f18c76 --- /dev/null +++ b/regression/cbmc/dereference-cache-flag/test-with-cache.desc @@ -0,0 +1,10 @@ +CORE +test.c +--show-vcc --symex-cache-dereferences +symex::dereference_cache!0#1 = 0 +^EXIT=0$ +^SIGNAL=0$ +-- +-- +This is just checking that the --symex-cache-dereferences flag actually +turns dereference caching on. diff --git a/regression/cbmc/dereference-cache-flag/test.c b/regression/cbmc/dereference-cache-flag/test.c new file mode 100644 index 00000000000..59cc9b35b23 --- /dev/null +++ b/regression/cbmc/dereference-cache-flag/test.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + int cond, x, y, *p = cond ? &x : &y; + assert(*p == 0); +} diff --git a/regression/cbmc/double_deref/README b/regression/cbmc/double_deref/README index 2aa63296031..55292da9cdc 100644 --- a/regression/cbmc/double_deref/README +++ b/regression/cbmc/double_deref/README @@ -25,9 +25,3 @@ double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)* double_deref_with_member.desc -- a double-deref with an intervening member expression (p->field1->field2), should use a let-expression double_deref_with_pointer_arithmetic.desc -- a double-deref with intervening pointer arithmetic (p[idx1][idx2]), should use a let-expression *_single_alias.desc -- variants of the above where the first dereference points to a single possible object, so no let-expression is necessary - -Why no-dereference-cache? -------------------------- - -Because these tests test for specific VCCs to do with multiple nested complex -dereferences and the dereference caching rewrites these. diff --git a/regression/cbmc/double_deref/double_deref.desc b/regression/cbmc/double_deref/double_deref.desc index d3cc5eadcfb..b5daf7673f3 100644 --- a/regression/cbmc/double_deref/double_deref.desc +++ b/regression/cbmc/double_deref/double_deref.desc @@ -1,4 +1,4 @@ -CORE no-dereference-cache +CORE double_deref.c --show-vcc \{1\} \(main::1::pptr!0@1#2 = address_of\(main::1::ptr[12]!0@1\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12]\) diff --git a/regression/cbmc/double_deref/double_deref_with_cast.desc b/regression/cbmc/double_deref/double_deref_with_cast.desc index 10bce49c798..e5b1f592416 100644 --- a/regression/cbmc/double_deref/double_deref_with_cast.desc +++ b/regression/cbmc/double_deref/double_deref_with_cast.desc @@ -1,4 +1,4 @@ -CORE no-dereference-cache +CORE double_deref_with_cast.c --show-vcc \{1\} \(cast\(main::1::pptr!0@1#2, signedbv\[32\]\*\*\) = address_of\(main::1::ptr2!0@1\) \? main::argc!0@1#1 = 2 \: main::argc!0@1#1 = 1\) diff --git a/regression/cbmc/double_deref/double_deref_with_member.desc b/regression/cbmc/double_deref/double_deref_with_member.desc index 667b6897bd2..85156557ba2 100644 --- a/regression/cbmc/double_deref/double_deref_with_member.desc +++ b/regression/cbmc/double_deref/double_deref_with_member.desc @@ -1,4 +1,4 @@ -CORE no-dereference-cache +CORE double_deref_with_member.c --show-vcc ^\{1\} \(main::1::cptr!0@1#2 = address_of\(main::1::container2!0@1\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\) diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc index 3b35123b077..df655168340 100644 --- a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc @@ -1,4 +1,4 @@ -CORE no-dereference-cache +CORE double_deref_with_pointer_arithmetic.c --show-vcc ^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = \{ symex_dynamic::dynamic_object1#3\[\[0\]\], symex_dynamic::dynamic_object1#3\[\[1\]\] \}\[cast\(mod #source_location=""\(main::argc!0@1#1, 2\), signedbv\[64\]\)\] diff --git a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc index 75314b5e8d0..cf6fda8319a 100644 --- a/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc +++ b/regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc @@ -1,4 +1,4 @@ -CORE no-dereference-cache +CORE double_deref_with_pointer_arithmetic_single_alias.c --show-vcc \{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object2\) \? main::argc!0@1#1 = 1 : symex::invalid_object!0#0 = main::argc!0@1#1\) diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index 5b7ba9223e1..0bce24fc6e1 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -342,6 +342,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) "max-node-refinement", cmdline.get_value("max-node-refinement")); + options.set_option( + "symex-cache-dereferences", cmdline.isset("symex-cache-dereferences")); + if(cmdline.isset("incremental-loop")) { options.set_option( diff --git a/src/goto-checker/bmc_util.h b/src/goto-checker/bmc_util.h index 0b820ecc455..3f922af1cc2 100644 --- a/src/goto-checker/bmc_util.h +++ b/src/goto-checker/bmc_util.h @@ -197,7 +197,8 @@ void run_property_decider( "(incremental-loop):" \ "(unwind-min):" \ "(unwind-max):" \ - "(ignore-properties-before-unwind-min)" + "(ignore-properties-before-unwind-min)" \ + "(symex-cache-dereferences)" \ #define HELP_BMC \ " --paths [strategy] explore paths one at a time\n" \ @@ -251,7 +252,8 @@ void run_property_decider( " iteration are allowed to fail due to\n" \ " complexity violations before the loop\n" \ " gets blacklisted\n" \ - " --graphml-witness filename write the witness in GraphML format to filename\n" // NOLINT(*) + " --graphml-witness filename write the witness in GraphML format to filename\n" /* NOLINT(*) */ \ + " --symex-cache-dereferences enable caching of repeated dereferences" \ // clang-format on #endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H diff --git a/src/goto-symex/goto_state.h b/src/goto-symex/goto_state.h index 208a06223c5..5845a113fc5 100644 --- a/src/goto-symex/goto_state.h +++ b/src/goto-symex/goto_state.h @@ -12,7 +12,6 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #ifndef CPROVER_GOTO_SYMEX_GOTO_STATE_H #define CPROVER_GOTO_SYMEX_GOTO_STATE_H -#include #include #include diff --git a/src/goto-symex/symex_config.h b/src/goto-symex/symex_config.h index b7bd52e83f6..70650744356 100644 --- a/src/goto-symex/symex_config.h +++ b/src/goto-symex/symex_config.h @@ -55,6 +55,14 @@ struct symex_configt final /// enables certain analyses that otherwise aren't run. bool complexity_limits_active; + /// \brief Whether or not to replace multiple occurrences of the same + /// dereference with a single symbol that contains the result of that + /// dereference. Can sometimes lead to a significant performance + /// improvement, but sometimes also makes things worse. + /// See https://github.com/diffblue/cbmc/pull/5964 for performance data. + /// Used in goto_symext::dereference_rec + bool cache_dereferences; + /// \brief Construct a symex_configt using options specified in an /// \ref optionst explicit symex_configt(const optionst &options); diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 2aeef379434..f0147b765f7 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -221,7 +221,7 @@ goto_symext::cache_dereference(exprt &dereference_result, statet &state) "symex", "dereference_cache", dereference_result.source_location(), - ID_C, + language_mode, ns, state.symbol_table); @@ -230,18 +230,18 @@ goto_symext::cache_dereference(exprt &dereference_result, statet &state) auto cache_value = cache_key; lift_lets(state, cache_value); - exprt::operandst guard{}; auto assign = symex_assignt{ state, symex_targett::assignment_typet::STATE, ns, symex_config, target}; + auto cache_symbol_expr = cache_symbol.symbol_expr(); assign.assign_symbol( - to_ssa_expr(state.rename(cache_symbol.symbol_expr(), ns).get()), + to_ssa_expr(state.rename(cache_symbol_expr, ns).get()), expr_skeletont{}, cache_value, - guard); + {}); - state.dereference_cache.insert(cache_key, cache_symbol.symbol_expr()); - return cache_symbol.symbol_expr(); + state.dereference_cache.insert(cache_key, cache_symbol_expr); + return cache_symbol_expr; } /// If \p expr is a \ref dereference_exprt, replace it with explicit references @@ -249,6 +249,10 @@ goto_symext::cache_dereference(exprt &dereference_result, statet &state) /// \p expr's operands, with special cases for address-of (handled by \ref /// goto_symext::address_arithmetic) and certain common expression patterns /// such as `&struct.flexible_array[0]` (see inline comments in code). +/// Note that \p write is used to alter behaviour when this function is +/// operating on the LHS of an assignment. Similarly \p is_in_quantifier +/// indicates when the dereference is inside a quantifier (related to scoping +/// when dereference caching is enabled). /// For full details of this method's pointer replacement and potential side- /// effects see \ref goto_symext::dereference void goto_symext::dereference_rec( @@ -332,24 +336,24 @@ void goto_symext::dereference_rec( // this may yield a new auto-object trigger_auto_object(tmp2, state); - // If the dereference result is not a complicated expression - // (i.e. of the form - // [let p = in ] - // (p == &something ? something : ...)) - // we should just return it unchanged. - // also if we are on the lhs of an assignment we should also not attempt to - // go to the cache we cannot do this for quantified expressions because this - // would result in us referencing quantifier variables outside the scope of - // the quantifier. + // Check various conditions for when we should try to cache the expression. + // 1. Caching dereferences must be enabled. + // 2. Do not cache inside LHS of writes. + // 3. Do not cache inside quantifiers (references variables outside their + // scope). + // 4. Only cache "complicated" expressions, i.e. of the form + // [let p = in ] + // (p == &something ? something : ...)) + // Otherwise we should just return it unchanged. if( - !write && !is_in_quantifier && + symex_config.cache_dereferences && !write && !is_in_quantifier && (tmp2.id() == ID_if || tmp2.id() == ID_let)) { expr = cache_dereference(tmp2, state); } else { - expr = tmp2; + expr = std::move(tmp2); } } else if( diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index b5053888497..f2fbcd50315 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -56,7 +56,8 @@ symex_configt::symex_configt(const optionst &options) "max-field-sensitivity-array-size") : DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE), complexity_limits_active( - options.get_signed_int_option("symex-complexity-limit") > 0) + options.get_signed_int_option("symex-complexity-limit") > 0), + cache_dereferences{options.get_bool_option("symex-cache-dereferences")} { } From d5543266788c14dd399dccc30680296ad3b18074 Mon Sep 17 00:00:00 2001 From: Thomas Given-Wilson Date: Fri, 26 Mar 2021 17:10:11 +0000 Subject: [PATCH 3/3] Use sharing_map instead of unordered_map for cache --- src/goto-symex/goto_state.cpp | 21 --------------------- src/goto-symex/goto_state.h | 17 +++-------------- src/goto-symex/symex_dereference.cpp | 2 +- 3 files changed, 4 insertions(+), 36 deletions(-) diff --git a/src/goto-symex/goto_state.cpp b/src/goto-symex/goto_state.cpp index c9e0d2c13f0..4803654fc51 100644 --- a/src/goto-symex/goto_state.cpp +++ b/src/goto-symex/goto_state.cpp @@ -12,27 +12,6 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include -optionalt -dereference_cachet::lookup(const exprt &dereference) const -{ - auto it = cache.find(dereference); - if(it == cache.end()) - { - return nullopt; - } - else - { - return {it->second}; - } -} - -void dereference_cachet::insert( - exprt new_cached_expr, - symbol_exprt new_cache_symbol) -{ - cache.emplace(std::move(new_cached_expr), std::move(new_cache_symbol)); -} - /// Print the constant propagation map in a human-friendly format. /// This is primarily for use from the debugger; please don't delete me just /// because there aren't any current callers. diff --git a/src/goto-symex/goto_state.h b/src/goto-symex/goto_state.h index 5845a113fc5..62405bf2ce6 100644 --- a/src/goto-symex/goto_state.h +++ b/src/goto-symex/goto_state.h @@ -24,19 +24,6 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com // by the parent class. class goto_symex_statet; -/// This is used for eliminating repeated complicated dereferences. -/// This is pretty much just a simple wrapper around a map. -/// \see goto_symext::dereference_rec -struct dereference_cachet -{ -private: - std::unordered_map cache; - -public: - optionalt lookup(const exprt &pointer) const; - void insert(exprt new_cached_pointer_expr, symbol_exprt new_cache_symbol); -}; - /// Container for data that varies per program point, e.g. the constant /// propagator state, when state needs to branch. This is copied out of /// goto_symex_statet at a control-flow fork and then back into it at a @@ -51,7 +38,9 @@ class goto_statet symex_level2t level2; public: - dereference_cachet dereference_cache; + /// This is used for eliminating repeated complicated dereferences. + /// \see goto_symext::dereference_rec + sharing_mapt dereference_cache; const symex_level2t &get_level2() const { diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index f0147b765f7..a88321abd48 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -211,7 +211,7 @@ goto_symext::cache_dereference(exprt &dereference_result, statet &state) return cache_key; }(); - if(auto cached = state.dereference_cache.lookup(cache_key)) + if(auto cached = state.dereference_cache.find(cache_key)) { return *cached; }