diff --git a/regression/cbmc/byte_update8/main.c b/regression/cbmc/byte_update8/main.c new file mode 100644 index 00000000000..85b3582585c --- /dev/null +++ b/regression/cbmc/byte_update8/main.c @@ -0,0 +1,12 @@ +#include + +int main() +{ + int x=0x01020304; + short *p=((short*)&x)+1; + *p=0xABCD; + assert(x==0xABCD0304); + p=(short*)(((char*)&x)+1); + *p=0xABCD; + assert(x==0xABABCD04); +} diff --git a/regression/cbmc/byte_update8/test.desc b/regression/cbmc/byte_update8/test.desc new file mode 100644 index 00000000000..9efefbc7362 --- /dev/null +++ b/regression/cbmc/byte_update8/test.desc @@ -0,0 +1,8 @@ +CORE +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/byte_update9/main.c b/regression/cbmc/byte_update9/main.c new file mode 100644 index 00000000000..21e0a4d3a46 --- /dev/null +++ b/regression/cbmc/byte_update9/main.c @@ -0,0 +1,12 @@ +#include + +int main() +{ + int x=0x01020304; + short *p=((short*)&x)+1; + *p=0xABCD; + assert(x==0x0102ABCD); + p=(short*)(((char*)&x)+1); + *p=0xABCD; + assert(x==0x01ABCDCD); +} diff --git a/regression/cbmc/byte_update9/test.desc b/regression/cbmc/byte_update9/test.desc new file mode 100644 index 00000000000..81ceb4c6dc0 --- /dev/null +++ b/regression/cbmc/byte_update9/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--big-endian +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/little-endian-array1/main.c b/regression/cbmc/little-endian-array1/main.c new file mode 100644 index 00000000000..5919db3e3c1 --- /dev/null +++ b/regression/cbmc/little-endian-array1/main.c @@ -0,0 +1,30 @@ +#include + +int *array; + +int main() +{ + unsigned size; + __CPROVER_assume(size==1); + + // produce unbounded array that does not have byte granularity + array=malloc(size*sizeof(int)); + array[0]=0x01020304; + + int array0=array[0]; + __CPROVER_assert(array0==0x01020304, "array[0] matches"); + + char *p=(char *)array; + char p0=p[0]; + char p1=p[1]; + char p2=p[2]; + char p3=p[3]; + __CPROVER_assert(p0==4, "p[0] matches"); + __CPROVER_assert(p1==3, "p[1] matches"); + __CPROVER_assert(p2==2, "p[2] matches"); + __CPROVER_assert(p3==1, "p[3] matches"); + + unsigned short *q=(unsigned short *)array; + unsigned short q0=q[0]; + __CPROVER_assert(q0==0x0304, "p[0,1] matches"); +} diff --git a/regression/cbmc/little-endian-array1/test.desc b/regression/cbmc/little-endian-array1/test.desc new file mode 100644 index 00000000000..9845e70d84b --- /dev/null +++ b/regression/cbmc/little-endian-array1/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--little-endian +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/memset1/main.c b/regression/cbmc/memset1/main.c new file mode 100644 index 00000000000..bd42b5e256e --- /dev/null +++ b/regression/cbmc/memset1/main.c @@ -0,0 +1,29 @@ +#include +#include +#include + +int main() +{ + int A[5]; + memset(A, 0, sizeof(int)*5); + assert(A[0]==0); + assert(A[1]==0); + assert(A[2]==0); + assert(A[3]==0); + assert(A[4]==0); + + A[3]=42; + memset(A, 0xFFFFFF01, sizeof(int)*3); + assert(A[0]==0x01010101); + assert(A[1]==0x01010111); + assert(A[2]==0x01010101); + assert(A[3]==42); + assert(A[4]==0); + + int *B=malloc(sizeof(int)*2); + memset(B, 2, sizeof(int)*2); + assert(B[0]==0x02020202); + assert(B[1]==0x02020202); + + return 0; +} diff --git a/regression/cbmc/memset1/test.desc b/regression/cbmc/memset1/test.desc new file mode 100644 index 00000000000..aef7e29d151 --- /dev/null +++ b/regression/cbmc/memset1/test.desc @@ -0,0 +1,10 @@ +CORE +main.c + +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +\[main.assertion.7\] assertion A\[1\]==0x01010111: FAILURE +\*\* 1 of 12 failed \(2 iterations\) +-- +^warning: ignoring diff --git a/src/ansi-c/ansi_c_internal_additions.cpp b/src/ansi-c/ansi_c_internal_additions.cpp index f337bbf6169..c3e69e9921a 100644 --- a/src/ansi-c/ansi_c_internal_additions.cpp +++ b/src/ansi-c/ansi_c_internal_additions.cpp @@ -213,7 +213,11 @@ void ansi_c_internal_additions(std::string &code) // arrays // NOLINTNEXTLINE(whitespace/line_length) "__CPROVER_bool __CPROVER_array_equal(const void *array1, const void *array2);\n" + // overwrite all of *dest (possibly using nondet values), even + // if *src is smaller "void __CPROVER_array_copy(const void *dest, const void *src);\n" + // replace at most size-of-*src bytes in *dest + "void __CPROVER_array_replace(const void *dest, const void *src);\n" "void __CPROVER_array_set(const void *dest, ...);\n" // k-induction diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index 207763608c2..d209b5a04fb 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -2473,10 +2473,12 @@ exprt c_typecheck_baset::do_special_functions( throw 0; } - exprt pointer_offset_expr=exprt(ID_pointer_offset, expr.type()); - pointer_offset_expr.operands()=expr.arguments(); + exprt pointer_offset_expr=pointer_offset(expr.arguments().front()); pointer_offset_expr.add_source_location()=source_location; + if(expr.type()!=pointer_offset_expr.type()) + pointer_offset_expr.make_typecast(expr.type()); + return pointer_offset_expr; } else if(identifier==CPROVER_PREFIX "POINTER_OBJECT") diff --git a/src/ansi-c/expr2c.cpp b/src/ansi-c/expr2c.cpp index 379f7b6288f..0a683ff6acf 100644 --- a/src/ansi-c/expr2c.cpp +++ b/src/ansi-c/expr2c.cpp @@ -3824,6 +3824,9 @@ std::string expr2ct::convert_code( if(statement==ID_array_copy) return convert_code_array_copy(src, indent); + if(statement==ID_array_replace) + return convert_code_array_replace(src, indent); + if(statement=="set_may" || statement=="set_must") return @@ -4225,6 +4228,39 @@ std::string expr2ct::convert_code_array_copy( /*******************************************************************\ +Function: expr2ct::convert_code_array_replace + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +std::string expr2ct::convert_code_array_replace( + const codet &src, + unsigned indent) +{ + std::string dest=indent_str(indent)+"ARRAY_REPLACE("; + + forall_operands(it, src) + { + unsigned p; + std::string arg_str=convert_with_precedence(*it, p); + + if(it!=src.operands().begin()) + dest+=", "; + dest+=arg_str; + } + + dest+=");"; + + return dest; +} + +/*******************************************************************\ + Function: expr2ct::convert_code_assert Inputs: @@ -4673,9 +4709,6 @@ std::string expr2ct::convert_with_precedence( else if(src.id()=="buffer_size") return convert_function(src, "BUFFER_SIZE", precedence=16); - else if(src.id()==ID_pointer_offset) - return convert_function(src, "POINTER_OFFSET", precedence=16); - else if(src.id()==ID_isnan) return convert_function(src, "isnan", precedence=16); diff --git a/src/ansi-c/expr2c_class.h b/src/ansi-c/expr2c_class.h index 771f936afb8..6387d031628 100644 --- a/src/ansi-c/expr2c_class.h +++ b/src/ansi-c/expr2c_class.h @@ -204,6 +204,7 @@ class expr2ct std::string convert_code_output(const codet &src, unsigned indent); std::string convert_code_array_set(const codet &src, unsigned indent); std::string convert_code_array_copy(const codet &src, unsigned indent); + std::string convert_code_array_replace(const codet &src, unsigned indent); virtual std::string convert_with_precedence( const exprt &src, unsigned &precedence); diff --git a/src/ansi-c/library/cprover.h b/src/ansi-c/library/cprover.h index bb41839ee01..ac9c529145a 100644 --- a/src/ansi-c/library/cprover.h +++ b/src/ansi-c/library/cprover.h @@ -52,7 +52,7 @@ void CBMC_trace(int lvl, const char *event, ...); #endif // pointers -//unsigned __CPROVER_POINTER_OBJECT(const void *p); +unsigned __CPROVER_POINTER_OBJECT(const void *p); signed __CPROVER_POINTER_OFFSET(const void *p); __CPROVER_bool __CPROVER_DYNAMIC_OBJECT(const void *p); #if 0 @@ -104,7 +104,8 @@ float __CPROVER_fabsf(float); // arrays //__CPROVER_bool __CPROVER_array_equal(const void *array1, const void *array2); void __CPROVER_array_copy(const void *dest, const void *src); -//void __CPROVER_array_set(const void *dest, ...); +void __CPROVER_array_set(const void *dest, ...); +void __CPROVER_array_replace(const void *dest, const void *src); #if 0 // k-induction diff --git a/src/ansi-c/library/stdlib.c b/src/ansi-c/library/stdlib.c index b6f13e9e9a2..3b85223320e 100644 --- a/src/ansi-c/library/stdlib.c +++ b/src/ansi-c/library/stdlib.c @@ -67,17 +67,18 @@ inline void *malloc(__CPROVER_size_t malloc_size); inline void *calloc(__CPROVER_size_t nmemb, __CPROVER_size_t size) { __CPROVER_HIDE:; - __CPROVER_size_t total_size=nmemb*size; void *res; - res=malloc(total_size); + res=malloc(nmemb*size); #ifdef __CPROVER_STRING_ABSTRACTION __CPROVER_is_zero_string(res)=1; __CPROVER_zero_string_length(res)=0; //for(int i=0; i1) + __CPROVER_array_set(res, 0); + else if(nmemb==1) + for(__CPROVER_size_t i=0; i=(const char *)src+n) + char src_n[n]; + __CPROVER_array_copy(src_n, (char*)src); + __CPROVER_array_replace((char*)dest, src_n); + #endif + return dest; +} + +/* FUNCTION: __builtin___memmove_chk */ + +#ifndef __CPROVER_STRING_H_INCLUDED +#include +#define __CPROVER_STRING_H_INCLUDED +#endif + +#undef memmove + +void *__builtin___memmove_chk(void *dest, const void *src, size_t n, __CPROVER_size_t size) +{ + __CPROVER_HIDE:; + #ifdef __CPROVER_STRING_ABSTRACTION + __CPROVER_assert(__CPROVER_buffer_size(src)>=n, "memmove buffer overflow"); + __CPROVER_assert(__CPROVER_buffer_size(dest)==size, "builtin object size"); + // dst = src (with overlap allowed) + if(__CPROVER_is_zero_string(src) && + n > __CPROVER_zero_string_length(src)) { - for(__CPROVER_size_t i=0; i0; i--) ((char *)dest)[i-1]=((const char *)src)[i-1]; - } + __CPROVER_is_zero_string(dest)=0; + #else + (void)size; + char src_n[n]; + __CPROVER_array_copy(src_n, (char*)src); + __CPROVER_array_replace((char*)dest, src_n); #endif return dest; } diff --git a/src/ansi-c/library/unistd.c b/src/ansi-c/library/unistd.c index b9c632fcb77..00af7246882 100644 --- a/src/ansi-c/library/unistd.c +++ b/src/ansi-c/library/unistd.c @@ -198,14 +198,19 @@ ssize_t read(int fildes, void *buf, size_t nbyte) if((fildes>=0 && fildes<=2) || fildes < __CPROVER_pipe_offset) { ssize_t nread; - size_t i; __CPROVER_assume(0<=nread && (size_t)nread<=nbyte); +#if 0 + size_t i; for(i=0; i +#include #include #include #include @@ -891,11 +892,8 @@ exprt string_abstractiont::build( if(what==whatt::LENGTH || what==whatt::SIZE) { // adjust for offset - exprt pointer_offset(ID_pointer_offset, size_type()); - pointer_offset.copy_to_operands(pointer); - if(pointer_offset.is_not_nil() && - !pointer_offset.is_zero()) - result=minus_exprt(result, pointer_offset); + result=minus_exprt(result, pointer_offset(pointer)); + result.op0().make_typecast(result.op1().type()); } return result; diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index 684fd6b0123..b2769ce8002 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -58,6 +58,14 @@ void goto_symext::process_array_expr_rec( expr.swap(tmp); process_array_expr_rec(expr, type); } + else if(expr.id()==ID_byte_extract_little_endian || + expr.id()==ID_byte_extract_big_endian) + { + // pick the root object + exprt tmp=to_byte_extract_expr(expr).op(); + expr.swap(tmp); + process_array_expr_rec(expr, type); + } else if(expr.id()==ID_symbol && expr.get_bool(ID_C_SSA_symbol) && to_ssa_expr(expr).get_original_expr().id()==ID_index) @@ -69,7 +77,10 @@ void goto_symext::process_array_expr_rec( } else Forall_operands(it, expr) - process_array_expr_rec(*it, it->type()); + { + typet t=it->type(); + process_array_expr_rec(*it, t); + } if(!base_type_eq(expr.type(), type, ns)) { @@ -129,6 +140,14 @@ void goto_symext::process_array_expr(exprt &expr) expr.swap(tmp); process_array_expr(expr); } + else if(expr.id()==ID_byte_extract_little_endian || + expr.id()==ID_byte_extract_big_endian) + { + // pick the root object + exprt tmp=to_byte_extract_expr(expr).op(); + expr.swap(tmp); + process_array_expr(expr); + } else if(expr.id()==ID_symbol && expr.get_bool(ID_C_SSA_symbol) && to_ssa_expr(expr).get_original_expr().id()==ID_index) diff --git a/src/goto-symex/symex_other.cpp b/src/goto-symex/symex_other.cpp index 56d5d40c832..eddf123fb89 100644 --- a/src/goto-symex/symex_other.cpp +++ b/src/goto-symex/symex_other.cpp @@ -85,7 +85,8 @@ void goto_symext::symex_other( { // we ignore this for now } - else if(statement==ID_array_copy) + else if(statement==ID_array_copy || + statement==ID_array_replace) { assert(code.operands().size()==2); @@ -94,15 +95,21 @@ void goto_symext::symex_other( // we need to add dereferencing for both operands dereference_exprt d0, d1; d0.op0()=code.op0(); - d0.type()=code.op0().type().subtype(); + d0.type()=empty_typet(); d1.op0()=code.op1(); - d1.type()=code.op1().type().subtype(); + d1.type()=empty_typet(); clean_code.op0()=d0; clean_code.op1()=d1; clean_expr(clean_code.op0(), state, true); + if(clean_code.op0().id()==byte_extract_id() && + clean_code.op0().type().id()==ID_empty) + clean_code.op0()=clean_code.op0().op0(); clean_expr(clean_code.op1(), state, false); + if(clean_code.op1().id()==byte_extract_id() && + clean_code.op1().type().id()==ID_empty) + clean_code.op1()=clean_code.op1().op0(); process_array_expr(clean_code.op0()); clean_expr(clean_code.op0(), state, true); @@ -114,11 +121,21 @@ void goto_symext::symex_other( clean_code.op1().type(), ns)) { byte_extract_exprt be(byte_extract_id()); - be.type()=clean_code.op0().type(); - be.op()=clean_code.op1(); be.offset()=from_integer(0, index_type()); - clean_code.op1()=be; + if(statement==ID_array_copy) + { + be.op()=clean_code.op1(); + be.type()=clean_code.op0().type(); + clean_code.op1()=be; + } + else + { + // ID_array_replace + be.op()=clean_code.op0(); + be.type()=clean_code.op1().type(); + clean_code.op0()=be; + } } code_assignt assignment; @@ -135,11 +152,14 @@ void goto_symext::symex_other( // we need to add dereferencing for the first operand dereference_exprt d0; d0.op0()=code.op0(); - d0.type()=code.op0().type().subtype(); + d0.type()=empty_typet(); clean_code.op0()=d0; clean_expr(clean_code.op0(), state, true); + if(clean_code.op0().id()==byte_extract_id() && + clean_code.op0().type().id()==ID_empty) + clean_code.op0()=clean_code.op0().op0(); clean_expr(clean_code.op1(), state, false); process_array_expr(clean_code.op0()); diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 96b6acbe204..8399b317f90 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -386,7 +386,7 @@ bool value_sett::eval_pointer_offset( if(mod && ptr_offset!=previous_offset) return false; - new_expr=from_integer(ptr_offset, index_type()); + new_expr=from_integer(ptr_offset, expr.type()); previous_offset=ptr_offset; mod=true; } @@ -1822,6 +1822,9 @@ void value_sett::apply_code( else if(statement==ID_array_copy) { } + else if(statement==ID_array_replace) + { + } else if(statement==ID_assume) { guard(to_code_assume(code).op0(), ns); diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index 6f9a2a14a98..cc4645bd614 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -28,8 +28,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include - #include + #include #include @@ -476,9 +476,6 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( const symbolt &memory_symbol=ns.lookup(CPROVER_PREFIX "memory"); exprt symbol_expr=symbol_exprt(memory_symbol.name, memory_symbol.type); - exprt pointer_offset=unary_exprt( - ID_pointer_offset, pointer_expr, index_type()); - if(base_type_eq( ns.follow(memory_symbol.type).subtype(), dereference_type, ns)) @@ -486,7 +483,7 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( // Types match already, what a coincidence! // We can use an index expression. - exprt index_expr=index_exprt(symbol_expr, pointer_offset); + exprt index_expr=index_exprt(symbol_expr, pointer_offset(pointer_expr)); index_expr.type()=ns.follow(memory_symbol.type).subtype(); result.value=index_expr; } @@ -494,7 +491,7 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( ns.follow(memory_symbol.type).subtype(), dereference_type)) { - exprt index_expr=index_exprt(symbol_expr, pointer_offset); + exprt index_expr=index_exprt(symbol_expr, pointer_offset(pointer_expr)); index_expr.type()=ns.follow(memory_symbol.type).subtype(); result.value=typecast_exprt(index_expr, dereference_type); } @@ -509,7 +506,8 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( else { exprt byte_extract(byte_extract_id(), dereference_type); - byte_extract.copy_to_operands(symbol_expr, pointer_offset); + byte_extract.copy_to_operands( + symbol_expr, pointer_offset(pointer_expr)); result.value=byte_extract; } } @@ -570,12 +568,14 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( if(o.offset().is_constant()) offset=o.offset(); else - offset=unary_exprt(ID_pointer_offset, pointer_expr, index_type()); + offset=pointer_offset(pointer_expr); exprt adjusted_offset; // are we doing a byte? mp_integer element_size= + dereference_type.id()==ID_empty? + pointer_offset_size(char_type(), ns): pointer_offset_size(dereference_type, ns); if(element_size==1) @@ -624,8 +624,7 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( result.value=o.root_object(); // this is relative to the root object - const exprt offset= - unary_exprt(ID_pointer_offset, pointer_expr, index_type()); + const exprt offset=pointer_offset(pointer_expr); if(memory_model(result.value, dereference_type, tmp_guard, offset)) { @@ -989,8 +988,9 @@ bool value_set_dereferencet::memory_model_bytes( if(from_width<=0) throw "unknown or invalid type size:\n"+from_type.pretty(); - mp_integer to_width=pointer_offset_size(to_type, ns); - if(to_width<=0) + mp_integer to_width= + to_type.id()==ID_empty?0: pointer_offset_size(to_type, ns); + if(to_width<0) throw "unknown or invalid type size:\n"+to_type.pretty(); exprt bound=from_integer(from_width-to_width, offset.type()); diff --git a/src/pointer-analysis/value_set_fi.cpp b/src/pointer-analysis/value_set_fi.cpp index 89975646131..fb8dd9e685a 100644 --- a/src/pointer-analysis/value_set_fi.cpp +++ b/src/pointer-analysis/value_set_fi.cpp @@ -1727,7 +1727,9 @@ void value_set_fit::apply_code( else if(statement==ID_fence) { } - else if(statement==ID_array_copy) + else if(statement==ID_array_copy || + statement==ID_array_replace || + statement==ID_array_set) { } else if(statement==ID_input || statement==ID_output) diff --git a/src/solvers/flattening/flatten_byte_operators.cpp b/src/solvers/flattening/flatten_byte_operators.cpp index a9597e635af..29494041687 100644 --- a/src/solvers/flattening/flatten_byte_operators.cpp +++ b/src/solvers/flattening/flatten_byte_operators.cpp @@ -6,6 +6,7 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ +#include #include #include #include @@ -13,11 +14,152 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include +#include #include "flatten_byte_operators.h" /*******************************************************************\ +Function: unpack_rec + + Inputs: + src object to unpack + little_endian true, iff assumed endianness is little-endian + max_bytes if not nil, use as upper bound of the number of bytes + to unpack + ns namespace for type lookups + + Outputs: array of bytes in the sequence found in memory + + Purpose: rewrite an object into its individual bytes + +\*******************************************************************/ + +static exprt unpack_rec( + const exprt &src, + bool little_endian, + const exprt &max_bytes, + const namespacet &ns, + bool unpack_byte_array=false) +{ + array_exprt array( + array_typet(unsignedbv_typet(8), from_integer(0, size_type()))); + + // endianness_mapt should be the point of reference for mapping out + // endianness, but we need to work on elements here instead of + // individual bits + + const typet &type=ns.follow(src.type()); + + if(type.id()==ID_array) + { + const array_typet &array_type=to_array_type(type); + const typet &subtype=array_type.subtype(); + + mp_integer element_width=pointer_offset_bits(subtype, ns); + // this probably doesn't really matter + #if 0 + if(element_width<=0) + throw "cannot unpack array with non-constant element width:\n"+ + type.pretty(); + else if(element_width%8!=0) + throw "cannot unpack array of non-byte aligned elements:\n"+ + type.pretty(); + #endif + + if(!unpack_byte_array && element_width==8) + return src; + + mp_integer num_elements; + if(to_integer(max_bytes, num_elements) && + to_integer(array_type.size(), num_elements)) + throw "cannot unpack array of non-const size:\n"+type.pretty(); + + // all array members will have the same structure; do this just + // once and then replace the dummy symbol by a suitable index + // expression in the loop below + symbol_exprt dummy(ID_C_incomplete, subtype); + exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true); + + for(mp_integer i=0; i <... 8bits ...> + // + // An array {0, 1} will be encoded as bvt bv={0,1}, i.e., bv[1]==1 + // concatenation(0, 1) will yield a bvt bv={1,0}, i.e., bv[1]==0 + // + // The semantics of byte_extract(endianness, op, offset, T) is: + // interpret ((char*)&op)+offset as the endianness-ordered storage + // of an object of type T at address ((char*)&op)+offset + // For some T x, byte_extract(endianness, x, 0, T) must yield x. + // + // byte_extract for a composite type T or an array will interpret + // the individual subtypes with suitable endianness; the overall + // order of components is not affected by endianness. + // + // Examples: + // unsigned char A[4]; + // byte_extract_little_endian(A, 0, unsigned short) requests that + // A[0],A[1] be interpreted as the storage of an unsigned short with + // A[1] being the most-significant byte; byte_extract_big_endian for + // the same operands will select A[0] as the most-significant byte. + // + // int A[2] = {0x01020304,0xDEADBEEF} + // byte_extract_big_endian(A, 0, short) should yield 0x0102 + // byte_extract_little_endian(A, 0, short) should yield 0x0304 + // To obtain this we first compute byte arrays while taking into + // account endianness: + // big-endian byte representation: {01,02,03,04,DE,AB,BE,EF} + // little-endian byte representation: {04,03,02,01,EF,BE,AB,DE} + // We extract the relevant bytes starting from ((char*)A)+0: + // big-endian: {01,02}; little-endian: {04,03} + // Finally we place them in the appropriate byte order as indicated + // by endianness: + // big-endian: (short)concatenation(01,02)=0x0102 + // little-endian: (short)concatenation(03,04)=0x0304 + assert(src.operands().size()==2); bool little_endian; @@ -44,135 +231,133 @@ exprt flatten_byte_extract( else assert(false); - mp_integer size_bits=pointer_offset_bits(src.type(), ns); - if(size_bits<0) - throw "byte_extract flatting with non-constant size: "+src.pretty(); + // determine an upper bound of the number of bytes we might need + exprt upper_bound=size_of_expr(src.type(), ns); + if(upper_bound.is_not_nil()) + upper_bound= + simplify_expr( + plus_exprt( + upper_bound, + typecast_exprt(src.offset(), upper_bound.type())), + ns); - if(src.op0().type().id()==ID_array) - { - const exprt &root=src.op0(); - const exprt &offset=src.op1(); + byte_extract_exprt unpacked(src); + unpacked.op()= + unpack_rec(src.op(), little_endian, upper_bound, ns); + + const typet &type=ns.follow(src.type()); - const array_typet &array_type=to_array_type(root.type()); + if(type.id()==ID_array) + { + const array_typet &array_type=to_array_type(type); const typet &subtype=array_type.subtype(); mp_integer element_width=pointer_offset_bits(subtype, ns); - if(element_width<0) // failed - throw "failed to flatten array with unknown element width"; - - mp_integer num_elements= - size_bits/element_width+((size_bits%element_width==0)?0:1); - - const typet &offset_type=ns.follow(offset.type()); - - // byte-array? - if(element_width==8) + mp_integer num_elements; + // TODO: consider ways of dealing with arrays of unknown subtype + // size or with a subtype size that does not fit byte boundaries + if(element_width>0 && element_width%8==0 && + to_integer(array_type.size(), num_elements)) { - // get 'width'-many bytes, and concatenate - std::size_t width_bytes=integer2unsigned(num_elements); - exprt::operandst op; - op.reserve(width_bytes); + array_exprt array(array_type); - for(std::size_t i=0; i=2 - { - concatenation_exprt concatenation(src.type()); - concatenation.operands().swap(op); - return concatenation; + array.copy_to_operands(flatten_byte_extract(tmp, ns)); } - } - else // non-byte array - { - // add an extra element as the access need not be aligned with - // element boundaries and could thus stretch over extra elements - ++num_elements; - assert(element_width!=0); + return array; + } + } + else if(type.id()==ID_struct) + { + const struct_typet &struct_type=to_struct_type(type); + const struct_typet::componentst &components=struct_type.components(); - // compute new root and offset - concatenation_exprt concat( - unsignedbv_typet(integer2unsigned(element_width*num_elements))); + bool failed=false; + struct_exprt s(struct_type); - assert(element_width%8==0); - exprt first_index= - div_exprt(offset, from_integer(element_width/8, offset_type)); + for(const auto &comp : components) + { + mp_integer element_width=pointer_offset_bits(comp.type(), ns); - // byte extract will do the appropriate mapping, thus MSB comes - // last here (as opposed to the above, where no further byte - // extract is involved) - for(mp_integer i=0; i=2 + { + concatenation_exprt concatenation(src.type()); + concatenation.operands().swap(op); + return concatenation; } } diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 9e787f58432..e31eceb1e98 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -809,6 +809,7 @@ IREP_ID_ONE(cprover_string_to_lower_case_func) IREP_ID_ONE(cprover_string_to_upper_case_func) IREP_ID_ONE(cprover_string_trim_func) IREP_ID_ONE(cprover_string_value_of_func) +IREP_ID_ONE(array_replace) #undef IREP_ID_ONE #undef IREP_ID_TWO diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index 7ab752c898f..e30459f2df2 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -283,7 +283,7 @@ exprt member_offset_expr( return member_offset_expr( to_struct_type(type), member_expr.get_component_name(), ns); else if(type.id()==ID_union) - return from_integer(0, signed_size_type()); + return from_integer(0, size_type()); else return nil_exprt(); } @@ -307,7 +307,7 @@ exprt member_offset_expr( { const struct_typet::componentst &components=type.components(); - exprt result=from_integer(0, signed_size_type()); + exprt result=from_integer(0, size_type()); std::size_t bit_field_bits=0; for(struct_typet::componentst::const_iterator diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index 535f8d92048..54ae6c8ff70 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -2045,7 +2045,9 @@ bool simplify_exprt::simplify_byte_extract(byte_extract_exprt &expr) assert(el_size%8==0); mp_integer el_bytes=el_size/8; - if(base_type_eq(expr.type(), op_type_ptr->subtype(), ns)) + if(base_type_eq(expr.type(), op_type_ptr->subtype(), ns) || + (expr.type().id()==ID_pointer && + op_type_ptr->subtype().id()==ID_pointer)) { if(offset%el_bytes==0) { @@ -2056,6 +2058,9 @@ bool simplify_exprt::simplify_byte_extract(byte_extract_exprt &expr) result, from_integer(offset, expr.offset().type())); + if(!base_type_eq(expr.type(), op_type_ptr->subtype(), ns)) + result.make_typecast(expr.type()); + expr.swap(result); simplify_rec(expr); return false; diff --git a/src/util/simplify_expr_pointer.cpp b/src/util/simplify_expr_pointer.cpp index ab25eb56a6a..f27f64c3dff 100644 --- a/src/util/simplify_expr_pointer.cpp +++ b/src/util/simplify_expr_pointer.cpp @@ -394,9 +394,8 @@ bool simplify_exprt::simplify_pointer_offset(exprt &expr) return true; // this might change the type of the pointer! - exprt pointer_offset(ID_pointer_offset, expr.type()); - pointer_offset.copy_to_operands(ptr_expr.front()); - simplify_node(pointer_offset); + exprt pointer_offset_expr=pointer_offset(ptr_expr.front()); + simplify_node(pointer_offset_expr); exprt sum; @@ -417,7 +416,7 @@ bool simplify_exprt::simplify_pointer_offset(exprt &expr) simplify_node(product); - expr=binary_exprt(pointer_offset, ID_plus, product, expr.type()); + expr=binary_exprt(pointer_offset_expr, ID_plus, product, expr.type()); simplify_node(expr); diff --git a/src/util/std_expr.h b/src/util/std_expr.h index d05d3282d62..ce5df4a99ac 100644 --- a/src/util/std_expr.h +++ b/src/util/std_expr.h @@ -2315,7 +2315,7 @@ class extractbits_exprt:public exprt const exprt &_lower, const typet &_type):exprt(ID_extractbits, _type) { - copy_to_operands(_src, _lower, _upper); + copy_to_operands(_src, _upper, _lower); } extractbits_exprt(