@@ -18,13 +18,101 @@ Author: Daniel Kroening, kroening@kroening.com
1818
1919#include " flatten_byte_extract_exceptions.h"
2020
21- // clang-format off
21+ static exprt unpack_rec (
22+ const exprt &src,
23+ bool little_endian,
24+ const exprt &max_bytes,
25+ const namespacet &ns,
26+ bool unpack_byte_array = false );
27+
28+ // / Rewrite an array or vector into its individual bytes.
29+ // / \param src: array/vector to unpack
30+ // / \param src_size: array/vector size; if not a constant, \p max_bytes must be
31+ // / a constant value, otherwise we fail with an exception
32+ // / \param element_width: bit width of array/vector elements
33+ // / \param little_endian: true, iff assumed endianness is little-endian
34+ // / \param max_bytes: if not nil, use as upper bound of the number of bytes to
35+ // / unpack
36+ // / \param ns: namespace for type lookups
37+ // / \return array_exprt holding unpacked elements
38+ static array_exprt unpack_array_vector (
39+ const exprt &src,
40+ const exprt &src_size,
41+ const mp_integer &element_width,
42+ bool little_endian,
43+ const exprt &max_bytes,
44+ const namespacet &ns)
45+ {
46+ auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
47+ auto num_elements = numeric_cast<mp_integer>(src_size);
48+
49+ if (!max_bytes_int && !num_elements)
50+ {
51+ throw non_const_array_sizet (src.type (), max_bytes);
52+ }
53+
54+ mp_integer first_element = 0 ;
55+
56+ // refine the number of elements to extract in case the element width is known
57+ // and a multiple of bytes; otherwise we will expand the entire array/vector
58+ optionalt<mp_integer> max_elements;
59+ if (element_width > 0 && element_width % 8 == 0 )
60+ {
61+ mp_integer el_bytes = element_width / 8 ;
62+
63+ if (!num_elements)
64+ {
65+ // turn bytes into elements, rounding up
66+ max_elements = (*max_bytes_int + el_bytes - 1 ) / el_bytes;
67+ }
68+ }
2269
23- // / rewrite an object into its individual bytes
24- // / \par parameters: src object to unpack
25- // / little_endian true, iff assumed endianness is little-endian
26- // / max_bytes if not nil, use as upper bound of the number of bytes to unpack
27- // / ns namespace for type lookups
70+ // the maximum number of bytes is an upper bound in case the size of the
71+ // array/vector is unknown; if the element_width was usable above this will
72+ // have been turned into a number of elements already
73+ if (!num_elements)
74+ num_elements = *max_elements;
75+
76+ const exprt src_simp = simplify_expr (src, ns);
77+
78+ exprt::operandst byte_operands;
79+ for (mp_integer i = first_element; i < *num_elements; ++i)
80+ {
81+ exprt element;
82+
83+ if (
84+ (src_simp.id () == ID_array || src_simp.id () == ID_vector) &&
85+ i < src_simp.operands ().size ())
86+ {
87+ const std::size_t index_int = numeric_cast_v<std::size_t >(i);
88+ element = src_simp.operands ()[index_int];
89+ }
90+ else
91+ {
92+ element = index_exprt (src_simp, from_integer (i, index_type ()));
93+ }
94+
95+ // recursively unpack each element until so that we eventually just have an
96+ // array of bytes left
97+ exprt sub = unpack_rec (element, little_endian, max_bytes, ns, true );
98+ byte_operands.insert (
99+ byte_operands.end (), sub.operands ().begin (), sub.operands ().end ());
100+ }
101+
102+ const std::size_t size = byte_operands.size ();
103+ return array_exprt (
104+ std::move (byte_operands),
105+ array_typet (unsignedbv_typet (8 ), from_integer (size, size_type ())));
106+ }
107+
108+ // / Rewrite an object into its individual bytes.
109+ // / \param src: object to unpack
110+ // / \param little_endian: true, iff assumed endianness is little-endian
111+ // / \param max_bytes: if not nil, use as upper bound of the number of bytes to
112+ // / unpack
113+ // / \param ns: namespace for type lookups
114+ // / \param unpack_byte_array: if true, return unmodified \p src iff it is an
115+ // array of bytes
28116// / \return array of bytes in the sequence found in memory
29117// / \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
30118// / object because of either non constant size, byte misalignment or
@@ -34,15 +122,8 @@ static exprt unpack_rec(
34122 bool little_endian,
35123 const exprt &max_bytes,
36124 const namespacet &ns,
37- bool unpack_byte_array= false )
125+ bool unpack_byte_array)
38126{
39- array_exprt array ({},
40- array_typet (unsignedbv_typet (8 ), from_integer (0 , size_type ())));
41-
42- // endianness_mapt should be the point of reference for mapping out
43- // endianness, but we need to work on elements here instead of
44- // individual bits
45-
46127 if (src.type ().id ()==ID_array)
47128 {
48129 const array_typet &array_type=to_array_type (src.type ());
@@ -51,54 +132,32 @@ static exprt unpack_rec(
51132 auto element_width = pointer_offset_bits (subtype, ns);
52133 CHECK_RETURN (element_width.has_value ());
53134
54- // this probably doesn't really matter
55- #if 0
56- INVARIANT(
57- element_width > 0,
58- "element width of array should be constant",
59- irep_pretty_diagnosticst(src.type()));
60-
61- INVARIANT(
62- element_width % 8 == 0,
63- "elements in array should be byte-aligned",
64- irep_pretty_diagnosticst(src.type()));
65- #endif
66-
67135 if (!unpack_byte_array && *element_width == 8 )
68136 return src;
69137
70- auto num_elements = numeric_cast<mp_integer>(max_bytes);
71- if (!num_elements.has_value ())
72- num_elements = numeric_cast<mp_integer>(array_type.size ());
73- if (!num_elements.has_value ())
74- throw non_const_array_sizet (array_type, max_bytes);
138+ return unpack_array_vector (
139+ src, array_type.size (), *element_width, little_endian, max_bytes, ns);
140+ }
141+ else if (src.type ().id () == ID_vector)
142+ {
143+ const vector_typet &vector_type = to_vector_type (src.type ());
144+ const typet &subtype = vector_type.subtype ();
75145
76- // all array members will have the same structure; do this just
77- // once and then replace the dummy symbol by a suitable index
78- // expression in the loop below
79- symbol_exprt dummy (ID_C_incomplete, subtype);
80- exprt sub=unpack_rec (dummy, little_endian, max_bytes, ns, true );
146+ auto element_width = pointer_offset_bits (subtype, ns);
147+ CHECK_RETURN (element_width.has_value ());
81148
82- for (mp_integer i=0 ; i<*num_elements; ++i)
83- {
84- index_exprt index (src, from_integer (i, index_type ()));
85- replace_symbolt replace;
86- replace.insert (dummy, index);
149+ if (!unpack_byte_array && *element_width == 8 )
150+ return src;
87151
88- for (const auto &op : sub.operands ())
89- {
90- exprt new_op=op;
91- replace (new_op);
92- simplify (new_op, ns);
93- array.copy_to_operands (new_op);
94- }
95- }
152+ return unpack_array_vector (
153+ src, vector_type.size (), *element_width, little_endian, max_bytes, ns);
96154 }
97155 else if (ns.follow (src.type ()).id ()==ID_struct)
98156 {
99157 const struct_typet &struct_type=to_struct_type (ns.follow (src.type ()));
100158 const struct_typet::componentst &components=struct_type.components ();
101159
160+ exprt::operandst byte_operands;
102161 for (const auto &comp : components)
103162 {
104163 auto element_width = pointer_offset_bits (comp.type (), ns);
@@ -114,9 +173,14 @@ static exprt unpack_rec(
114173 member_exprt member (src, comp.get_name (), comp.type ());
115174 exprt sub=unpack_rec (member, little_endian, max_bytes, ns, true );
116175
117- for ( const auto & op : sub. operands ())
118- array. copy_to_operands (op );
176+ byte_operands. insert (
177+ byte_operands. end (), sub. operands (). begin (), sub. operands (). end () );
119178 }
179+
180+ const std::size_t size = byte_operands.size ();
181+ return array_exprt (
182+ std::move (byte_operands),
183+ array_typet (unsignedbv_typet (8 ), from_integer (size, size_type ())));
120184 }
121185 else if (src.type ().id ()!=ID_empty)
122186 {
@@ -138,6 +202,7 @@ static exprt unpack_rec(
138202 bits = *bits_opt * 8 ;
139203 }
140204
205+ exprt::operandst byte_operands;
141206 for (mp_integer i=0 ; i<bits; i+=8 )
142207 {
143208 extractbits_exprt extractbits (
@@ -146,17 +211,23 @@ static exprt unpack_rec(
146211 from_integer (i, index_type ()),
147212 unsignedbv_typet (8 ));
148213
214+ // endianness_mapt should be the point of reference for mapping out
215+ // endianness, but we need to work on elements here instead of
216+ // individual bits
149217 if (little_endian)
150- array. copy_to_operands (extractbits);
218+ byte_operands. push_back (extractbits);
151219 else
152- array. operands (). insert (array. operands () .begin (), extractbits);
220+ byte_operands. insert (byte_operands .begin (), extractbits);
153221 }
154- }
155222
156- to_array_type (array.type ()).size ()=
157- from_integer (array.operands ().size (), size_type ());
223+ const std::size_t size = byte_operands.size ();
224+ return array_exprt (
225+ std::move (byte_operands),
226+ array_typet (unsignedbv_typet (8 ), from_integer (size, size_type ())));
227+ }
158228
159- return std::move (array);
229+ return array_exprt (
230+ {}, array_typet (unsignedbv_typet (8 ), from_integer (0 , size_type ())));
160231}
161232
162233// / rewrite byte extraction from an array to byte extraction from a
@@ -233,12 +304,16 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
233304 const typet &subtype=array_type.subtype ();
234305
235306 auto element_width = pointer_offset_bits (subtype, ns);
236- const auto num_elements = numeric_cast<mp_integer>(array_type.size ());
237- // TODO: consider ways of dealing with arrays of unknown subtype
238- // size or with a subtype size that does not fit byte boundaries
307+ auto num_elements = numeric_cast<mp_integer>(array_type.size ());
308+ if (!num_elements.has_value ())
309+ num_elements = mp_integer (unpacked.op ().operands ().size ());
310+
311+ // consider ways of dealing with arrays of unknown subtype size or with a
312+ // subtype size that does not fit byte boundaries; currently we fall back to
313+ // stitching together consecutive elements down below
239314 if (
240315 element_width.has_value () && *element_width >= 1 &&
241- *element_width % 8 == 0 && num_elements. has_value () )
316+ *element_width % 8 == 0 )
242317 {
243318 array_exprt array ({}, array_type);
244319
@@ -258,6 +333,41 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
258333 return simplify_expr (array, ns);
259334 }
260335 }
336+ else if (src.type ().id () == ID_vector)
337+ {
338+ const vector_typet &vector_type = to_vector_type (src.type ());
339+ const typet &subtype = vector_type.subtype ();
340+
341+ mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size ());
342+
343+ auto element_width = pointer_offset_bits (subtype, ns);
344+ CHECK_RETURN (element_width.has_value ());
345+
346+ // consider ways of dealing with vectors of unknown subtype size or with a
347+ // subtype size that does not fit byte boundaries; currently we fall back to
348+ // stitching together consecutive elements down below
349+ if (
350+ element_width.has_value () && *element_width >= 1 &&
351+ *element_width % 8 == 0 )
352+ {
353+ vector_exprt vector (vector_type);
354+
355+ for (mp_integer i = 0 ; i < num_elements; ++i)
356+ {
357+ plus_exprt new_offset (
358+ unpacked.offset (),
359+ from_integer (i * (*element_width) / 8 , unpacked.offset ().type ()));
360+
361+ byte_extract_exprt tmp (unpacked);
362+ tmp.type () = subtype;
363+ tmp.offset () = simplify_expr (new_offset, ns);
364+
365+ vector.copy_to_operands (lower_byte_extract (tmp, ns));
366+ }
367+
368+ return simplify_expr (vector, ns);
369+ }
370+ }
261371 else if (ns.follow (src.type ()).id ()==ID_struct)
262372 {
263373 const struct_typet &struct_type=to_struct_type (ns.follow (src.type ()));
@@ -299,10 +409,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
299409 const exprt &root=unpacked.op ();
300410 const exprt &offset=unpacked.offset ();
301411
302- const array_typet &array_type=to_array_type (root.type ());
303- const typet &subtype=array_type.subtype ();
412+ optionalt<typet> subtype;
413+ if (root.type ().id () == ID_vector)
414+ subtype = to_vector_type (root.type ()).subtype ();
415+ else
416+ subtype = to_array_type (root.type ()).subtype ();
304417
305- auto subtype_bits = pointer_offset_bits (subtype, ns);
418+ auto subtype_bits = pointer_offset_bits (* subtype, ns);
306419
307420 DATA_INVARIANT (
308421 subtype_bits.has_value () && *subtype_bits == 8 ,
@@ -344,7 +457,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
344457 else // width_bytes>=2
345458 {
346459 concatenation_exprt concatenation (
347- std::move (op), bitvector_typet (subtype. id (), width_bytes * 8 ));
460+ std::move (op), bitvector_typet (subtype-> id (), width_bytes * 8 ));
348461 return simplify_expr (
349462 typecast_exprt (std::move (concatenation), src.type ()), ns);
350463 }
@@ -649,4 +762,3 @@ exprt lower_byte_operators(const exprt &src, const namespacet &ns)
649762 else
650763 return tmp;
651764}
652- // clang-format on
0 commit comments