From f763691129c0abacd5bda03f7e546e345202f483 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Fri, 15 Sep 2017 20:24:33 +0100 Subject: [PATCH 1/2] Tidy up remove_instanceof Sort out and add to the comments Tidy up code for readability Comment about the fact we mutate the instruction list while iterating Replace asserts --- src/goto-programs/goto_program_template.h | 1 + src/goto-programs/remove_instanceof.cpp | 105 ++++++++++++---------- 2 files changed, 57 insertions(+), 49 deletions(-) diff --git a/src/goto-programs/goto_program_template.h b/src/goto-programs/goto_program_template.h index 846039341b4..da596927fc2 100644 --- a/src/goto-programs/goto_program_template.h +++ b/src/goto-programs/goto_program_template.h @@ -361,6 +361,7 @@ class goto_program_templatet } }; + // Never try to change this to vector-we mutate the list while iterating typedef std::list instructionst; typedef typename instructionst::iterator targett; diff --git a/src/goto-programs/remove_instanceof.cpp b/src/goto-programs/remove_instanceof.cpp index c1f17ef6b39..1346063104d 100644 --- a/src/goto-programs/remove_instanceof.cpp +++ b/src/goto-programs/remove_instanceof.cpp @@ -74,40 +74,53 @@ bool remove_instanceoft::contains_instanceof( return false; } -/// Replaces an expression like e instanceof A with e.@class_identifier == "A" -/// Or a big-or of similar expressions if we know of subtypes that also satisfy +/// Replaces an expression like e instanceof A with e.\@class_identifier == "A" +/// or a big-or of similar expressions if we know of subtypes that also satisfy /// the given test. -/// \par parameters: Expression to lower `expr` and the `goto_program` and -/// instruction `this_inst` it belongs to. -/// \return Side-effect on `expr` replacing it with an explicit clsid test +/// \param expr: Expression to lower (the code or the guard of an instruction) +/// \param goto_program: program the expression belongs to +/// \param this_inst: instruction the expression is found at +/// \param inst_switch: set of instructions to switch around once we're done void remove_instanceoft::lower_instanceof( exprt &expr, goto_programt &goto_program, goto_programt::targett this_inst, instanceof_instt &inst_switch) { - if(expr.id()==ID_java_instanceof) + if(expr.id()!=ID_java_instanceof) { + Forall_operands(it, expr) + lower_instanceof(*it, goto_program, this_inst, inst_switch); + return; + } + const exprt &check_ptr=expr.op0(); - assert(check_ptr.type().id()==ID_pointer); + INVARIANT( + check_ptr.type().id()==ID_pointer, + "instanceof first operand should be a pointer"); const exprt &target_arg=expr.op1(); - assert(target_arg.id()==ID_type); + INVARIANT( + target_arg.id()==ID_type, + "instanceof second operand should be a type"); const typet &target_type=target_arg.type(); // Find all types we know about that satisfy the given requirement: - assert(target_type.id()==ID_symbol); + INVARIANT( + target_type.id()==ID_symbol, + "instanceof second operand should have a simple type"); const irep_idt &target_name= to_symbol_type(target_type).get_identifier(); std::vector children= class_hierarchy.get_children_trans(target_name); children.push_back(target_name); - assert(!children.empty() && "Unable to execute instanceof"); - - // Insert an instruction before this one that assigns the clsid we're - // checking against to a temporary, as GOTO program if-expressions should + // Insert an instruction before the new check that assigns the clsid we're + // checking for to a temporary, as GOTO program if-expressions should // not contain derefs. - + // We actually insert the assignment instruction after the existing one. + // This will briefly be ill-formed (use before def of instanceof_tmp) but the + // two will subsequently switch places. This makes sure that the inserted + // assignement doesn't end up before any labels pointing at this instruction. symbol_typet jlo("java::java.lang.Object"); exprt object_clsid=get_class_identifier_field(check_ptr, jlo, ns); @@ -126,10 +139,7 @@ void remove_instanceoft::lower_instanceof( newinst->source_location=this_inst->source_location; newinst->function=this_inst->function; - // Insert the check instruction after the existing one. - // This will briefly be ill-formed (use before def of - // instanceof_tmp) but the two will subsequently switch - // places in lower_instanceof(goto_programt &) below. + // Remember to switch the instructions inst_switch.push_back(make_pair(this_inst, newinst)); // Replace the instanceof construct with a big-or. exprt::operandst or_ops; @@ -140,20 +150,14 @@ void remove_instanceoft::lower_instanceof( or_ops.push_back(test); } expr=disjunction(or_ops); - } - else - { - Forall_operands(it, expr) - lower_instanceof(*it, goto_program, this_inst, inst_switch); - } } -/// See function above -/// \par parameters: GOTO program instruction `target` whose instanceof -/// expressions, -/// if any, should be replaced with explicit tests, and the -/// `goto_program` it is part of. -/// \return Side-effect on `target` as above. +/// Replaces expressions like e instanceof A with e.\@class_identifier == "A" +/// or a big-or of similar expressions if we know of subtypes that also satisfy +/// the given test. Does this for the code or guard at a specific instruction. +/// \param goto_program: program to process +/// \param target: instruction to check for instanceof expressions +/// \param inst_switch: set of instructions to switch around once we're done void remove_instanceoft::lower_instanceof( goto_programt &goto_program, goto_programt::targett target, @@ -167,35 +171,39 @@ void remove_instanceoft::lower_instanceof( // code (e.g. a guarded-goto), but this assertion checks that this // assumption is correct and remains true on future evolution of the // allowable goto instruction types. - assert(!(code_iof && guard_iof)); + INVARIANT( + !(code_iof && guard_iof), "instanceof code should not have a guard"); if(code_iof) lower_instanceof(target->code, goto_program, target, inst_switch); if(guard_iof) lower_instanceof(target->guard, goto_program, target, inst_switch); } -/// See function above -/// \par parameters: `goto_program`, all of whose instanceof expressions will -/// be replaced by explicit class-identifier tests. -/// \return Side-effect on `goto_program` as above. +/// Replace every instanceof in the passed function body with an explicit +/// class-identifier test. +/// Extra auxiliary variables may be introduced into symbol_table. +/// \param goto_program: The function body to work on. +/// \return true if one or more instanceof expressions have been replaced bool remove_instanceoft::lower_instanceof(goto_programt &goto_program) { instanceof_instt inst_switch; - Forall_goto_program_instructions(target, goto_program) - lower_instanceof(goto_program, target, inst_switch); - if(!inst_switch.empty()) + for(goto_programt::instructionst::iterator target= + goto_program.instructions.begin(); + target!=goto_program.instructions.end(); + ++target) { - for(auto &p : inst_switch) - { - const goto_programt::targett &this_inst=p.first; - const goto_programt::targett &newinst=p.second; - this_inst->swap(*newinst); - } - goto_program.update(); - return true; + lower_instanceof(goto_program, target, inst_switch); } - else + if(inst_switch.empty()) return false; + for(auto &p : inst_switch) + { + const goto_programt::targett &this_inst=p.first; + const goto_programt::targett &newinst=p.second; + this_inst->swap(*newinst); + } + goto_program.update(); + return true; } /// See function above @@ -226,6 +234,5 @@ void remove_instanceof( void remove_instanceof(goto_modelt &goto_model) { - remove_instanceof( - goto_model.symbol_table, goto_model.goto_functions); + remove_instanceof(goto_model.symbol_table, goto_model.goto_functions); } From 2ddb0bcd8152e849c44cae71d079ced76dc2f940 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Mon, 18 Sep 2017 15:27:05 +0100 Subject: [PATCH 2/2] Simplify remove_instanceof logic Do the swap after each removal rather than in a single pass after each function body --- src/goto-programs/remove_instanceof.cpp | 204 ++++++++++-------------- 1 file changed, 86 insertions(+), 118 deletions(-) diff --git a/src/goto-programs/remove_instanceof.cpp b/src/goto-programs/remove_instanceof.cpp index 1346063104d..69db2a8859e 100644 --- a/src/goto-programs/remove_instanceof.cpp +++ b/src/goto-programs/remove_instanceof.cpp @@ -15,6 +15,7 @@ Author: Chris Smowton, chris.smowton@diffblue.com #include "class_identifier.h" #include +#include #include @@ -42,114 +43,93 @@ class remove_instanceoft bool lower_instanceof(goto_programt &); - typedef std::vector< - std::pair> instanceof_instt; + bool lower_instanceof(goto_programt &, goto_programt::targett); - void lower_instanceof( - goto_programt &, - goto_programt::targett, - instanceof_instt &); - - void lower_instanceof( - exprt &, - goto_programt &, - goto_programt::targett, - instanceof_instt &); - - bool contains_instanceof(const exprt &); + std::size_t lower_instanceof( + exprt &, goto_programt &, goto_programt::targett); }; -/// Avoid breaking sharing by checking for instanceof before calling -/// lower_instanceof. -/// \par parameters: Expression `expr` -/// \return Returns true if `expr` contains any instanceof ops -bool remove_instanceoft::contains_instanceof( - const exprt &expr) -{ - if(expr.id()==ID_java_instanceof) - return true; - forall_operands(it, expr) - if(contains_instanceof(*it)) - return true; - return false; -} - /// Replaces an expression like e instanceof A with e.\@class_identifier == "A" /// or a big-or of similar expressions if we know of subtypes that also satisfy /// the given test. /// \param expr: Expression to lower (the code or the guard of an instruction) /// \param goto_program: program the expression belongs to /// \param this_inst: instruction the expression is found at -/// \param inst_switch: set of instructions to switch around once we're done -void remove_instanceoft::lower_instanceof( +/// \return number of instanceof expressions that have been replaced +std::size_t remove_instanceoft::lower_instanceof( exprt &expr, goto_programt &goto_program, - goto_programt::targett this_inst, - instanceof_instt &inst_switch) + goto_programt::targett this_inst) { if(expr.id()!=ID_java_instanceof) { + std::size_t replacements=0; Forall_operands(it, expr) - lower_instanceof(*it, goto_program, this_inst, inst_switch); - return; + replacements+=lower_instanceof(*it, goto_program, this_inst); + return replacements; + } + + INVARIANT( + expr.operands().size()==2, + "java_instanceof should have two operands"); + + const exprt &check_ptr=expr.op0(); + INVARIANT( + check_ptr.type().id()==ID_pointer, + "instanceof first operand should be a pointer"); + + const exprt &target_arg=expr.op1(); + INVARIANT( + target_arg.id()==ID_type, + "instanceof second operand should be a type"); + const typet &target_type=target_arg.type(); + + // Find all types we know about that satisfy the given requirement: + INVARIANT( + target_type.id()==ID_symbol, + "instanceof second operand should have a simple type"); + const irep_idt &target_name= + to_symbol_type(target_type).get_identifier(); + std::vector children= + class_hierarchy.get_children_trans(target_name); + children.push_back(target_name); + + // Insert an instruction before the new check that assigns the clsid we're + // checking for to a temporary, as GOTO program if-expressions should + // not contain derefs. + // We actually insert the assignment instruction after the existing one. + // This will briefly be ill-formed (use before def of instanceof_tmp) but the + // two will subsequently switch places. This makes sure that the inserted + // assignement doesn't end up before any labels pointing at this instruction. + symbol_typet jlo=to_symbol_type(java_lang_object_type().subtype()); + exprt object_clsid=get_class_identifier_field(check_ptr, jlo, ns); + + symbolt &newsym= + get_fresh_aux_symbol( + object_clsid.type(), + "instanceof_tmp", + "instanceof_tmp", + source_locationt(), + ID_java, + symbol_table); + + auto newinst=goto_program.insert_after(this_inst); + newinst->make_assignment(); + newinst->code=code_assignt(newsym.symbol_expr(), object_clsid); + newinst->source_location=this_inst->source_location; + newinst->function=this_inst->function; + + // Replace the instanceof construct with a big-or. + exprt::operandst or_ops; + for(const auto &clsname : children) + { + constant_exprt clsexpr(clsname, string_typet()); + equal_exprt test(newsym.symbol_expr(), clsexpr); + or_ops.push_back(test); } + expr=disjunction(or_ops); - const exprt &check_ptr=expr.op0(); - INVARIANT( - check_ptr.type().id()==ID_pointer, - "instanceof first operand should be a pointer"); - const exprt &target_arg=expr.op1(); - INVARIANT( - target_arg.id()==ID_type, - "instanceof second operand should be a type"); - const typet &target_type=target_arg.type(); - - // Find all types we know about that satisfy the given requirement: - INVARIANT( - target_type.id()==ID_symbol, - "instanceof second operand should have a simple type"); - const irep_idt &target_name= - to_symbol_type(target_type).get_identifier(); - std::vector children= - class_hierarchy.get_children_trans(target_name); - children.push_back(target_name); - - // Insert an instruction before the new check that assigns the clsid we're - // checking for to a temporary, as GOTO program if-expressions should - // not contain derefs. - // We actually insert the assignment instruction after the existing one. - // This will briefly be ill-formed (use before def of instanceof_tmp) but the - // two will subsequently switch places. This makes sure that the inserted - // assignement doesn't end up before any labels pointing at this instruction. - symbol_typet jlo("java::java.lang.Object"); - exprt object_clsid=get_class_identifier_field(check_ptr, jlo, ns); - - symbolt &newsym= - get_fresh_aux_symbol( - object_clsid.type(), - "instanceof_tmp", - "instanceof_tmp", - source_locationt(), - ID_java, - symbol_table); - - auto newinst=goto_program.insert_after(this_inst); - newinst->make_assignment(); - newinst->code=code_assignt(newsym.symbol_expr(), object_clsid); - newinst->source_location=this_inst->source_location; - newinst->function=this_inst->function; - - // Remember to switch the instructions - inst_switch.push_back(make_pair(this_inst, newinst)); - // Replace the instanceof construct with a big-or. - exprt::operandst or_ops; - for(const auto &clsname : children) - { - constant_exprt clsexpr(clsname, string_typet()); - equal_exprt test(newsym.symbol_expr(), clsexpr); - or_ops.push_back(test); - } - expr=disjunction(or_ops); + return 1; } /// Replaces expressions like e instanceof A with e.\@class_identifier == "A" @@ -157,26 +137,20 @@ void remove_instanceoft::lower_instanceof( /// the given test. Does this for the code or guard at a specific instruction. /// \param goto_program: program to process /// \param target: instruction to check for instanceof expressions -/// \param inst_switch: set of instructions to switch around once we're done -void remove_instanceoft::lower_instanceof( +/// \return true if an instanceof has been replaced +bool remove_instanceoft::lower_instanceof( goto_programt &goto_program, - goto_programt::targett target, - instanceof_instt &inst_switch) + goto_programt::targett target) { - bool code_iof=contains_instanceof(target->code); - bool guard_iof=contains_instanceof(target->guard); - // The instruction-switching step below will malfunction if a check - // has been added for the code *and* for the guard. This should - // be impossible, because guarded instructions currently have simple - // code (e.g. a guarded-goto), but this assertion checks that this - // assumption is correct and remains true on future evolution of the - // allowable goto instruction types. - INVARIANT( - !(code_iof && guard_iof), "instanceof code should not have a guard"); - if(code_iof) - lower_instanceof(target->code, goto_program, target, inst_switch); - if(guard_iof) - lower_instanceof(target->guard, goto_program, target, inst_switch); + std::size_t replacements= + lower_instanceof(target->code, goto_program, target)+ + lower_instanceof(target->guard, goto_program, target); + + if(replacements==0) + return false; + // Swap the original instruction with the last assignment added after it + target->swap(*std::next(target, replacements)); + return true; } /// Replace every instanceof in the passed function body with an explicit @@ -186,22 +160,16 @@ void remove_instanceoft::lower_instanceof( /// \return true if one or more instanceof expressions have been replaced bool remove_instanceoft::lower_instanceof(goto_programt &goto_program) { - instanceof_instt inst_switch; + bool changed=false; for(goto_programt::instructionst::iterator target= goto_program.instructions.begin(); target!=goto_program.instructions.end(); ++target) { - lower_instanceof(goto_program, target, inst_switch); + changed=lower_instanceof(goto_program, target) || changed; } - if(inst_switch.empty()) + if(!changed) return false; - for(auto &p : inst_switch) - { - const goto_programt::targett &this_inst=p.first; - const goto_programt::targett &newinst=p.second; - this_inst->swap(*newinst); - } goto_program.update(); return true; }