@@ -18,13 +18,102 @@ Author: Daniel Kroening, kroening@kroening.com
1818
1919#include " flatten_byte_extract_exceptions.h"
2020
21- // clang-format off
21+ static exprt unpack_rec (
22+ const exprt &src,
23+ bool little_endian,
24+ const exprt &max_bytes,
25+ const namespacet &ns,
26+ bool unpack_byte_array = false );
27+
28+ // / Rewrite an array or vector into its individual bytes.
29+ // / \param src: array/vector to unpack
30+ // / \param src_size: array/vector size; if not a constant, \p max_bytes must be
31+ // / a constant value, otherwise we fail with an exception
32+ // / \param element_width: bit width of array/vector elements
33+ // / \param little_endian: true, iff assumed endianness is little-endian
34+ // / \param max_bytes: if not nil, use as upper bound of the number of bytes to
35+ // / unpack
36+ // / \param ns: namespace for type lookups
37+ // / \return array_exprt holding unpacked elements
38+ static array_exprt unpack_array_vector (
39+ const exprt &src,
40+ const exprt &src_size,
41+ const mp_integer &element_width,
42+ bool little_endian,
43+ const exprt &max_bytes,
44+ const namespacet &ns)
45+ {
46+ auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
47+ auto num_elements = numeric_cast<mp_integer>(src_size);
48+
49+ if (!max_bytes_int && !num_elements)
50+ {
51+ throw non_const_array_sizet (src.type (), max_bytes);
52+ }
53+
54+ mp_integer first_element = 0 ;
55+
56+ // refine the number of elements to extract in case the element width is known
57+ // and a multiple of bytes; otherwise we will expand the entire array/vector
58+ optionalt<mp_integer> max_elements;
59+ if (element_width > 0 && element_width % 8 == 0 )
60+ {
61+ mp_integer el_bytes = element_width / 8 ;
62+
63+ // turn bytes into elements
64+ if (!num_elements)
65+ {
66+ // round up to nearest multiple of el_bytes
67+ max_elements = (*max_bytes_int + el_bytes - 1 ) / el_bytes;
68+ }
69+ }
2270
23- // / rewrite an object into its individual bytes
24- // / \par parameters: src object to unpack
25- // / little_endian true, iff assumed endianness is little-endian
26- // / max_bytes if not nil, use as upper bound of the number of bytes to unpack
27- // / ns namespace for type lookups
71+ // the maximum number of bytes is an upper bound in case the size of the
72+ // array/vector is unknown; if the element_width was usable above this will
73+ // have been turned into a number of elements already
74+ if (!num_elements)
75+ num_elements = *max_elements;
76+
77+ const exprt src_simp = simplify_expr (src, ns);
78+
79+ exprt::operandst byte_operands;
80+ for (mp_integer i = first_element; i < *num_elements; ++i)
81+ {
82+ exprt element;
83+
84+ if (
85+ (src_simp.id () == ID_array || src_simp.id () == ID_vector) &&
86+ i < src_simp.operands ().size ())
87+ {
88+ const std::size_t index_int = numeric_cast_v<std::size_t >(i);
89+ element = src_simp.operands ()[index_int];
90+ }
91+ else
92+ {
93+ element = index_exprt (src_simp, from_integer (i, index_type ()));
94+ }
95+
96+ // recursively unpack each element until so that we eventually just have an
97+ // array of bytes left
98+ exprt sub = unpack_rec (element, little_endian, max_bytes, ns, true );
99+ byte_operands.insert (
100+ byte_operands.end (), sub.operands ().begin (), sub.operands ().end ());
101+ }
102+
103+ const std::size_t size = byte_operands.size ();
104+ return array_exprt (
105+ std::move (byte_operands),
106+ array_typet (unsignedbv_typet (8 ), from_integer (size, size_type ())));
107+ }
108+
109+ // / Rewrite an object into its individual bytes.
110+ // / \param src: object to unpack
111+ // / \param little_endian: true, iff assumed endianness is little-endian
112+ // / \param max_bytes: if not nil, use as upper bound of the number of bytes to
113+ // / unpack
114+ // / \param ns: namespace for type lookups
115+ // / \param unpack_byte_array: if true, return unmodified \p src iff it is an
116+ // array of bytes
28117// / \return array of bytes in the sequence found in memory
29118// / \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
30119// / object because of either non constant size, byte misalignment or
@@ -34,15 +123,8 @@ static exprt unpack_rec(
34123 bool little_endian,
35124 const exprt &max_bytes,
36125 const namespacet &ns,
37- bool unpack_byte_array= false )
126+ bool unpack_byte_array)
38127{
39- array_exprt array ({},
40- array_typet (unsignedbv_typet (8 ), from_integer (0 , size_type ())));
41-
42- // endianness_mapt should be the point of reference for mapping out
43- // endianness, but we need to work on elements here instead of
44- // individual bits
45-
46128 if (src.type ().id ()==ID_array)
47129 {
48130 const array_typet &array_type=to_array_type (src.type ());
@@ -51,54 +133,32 @@ static exprt unpack_rec(
51133 auto element_width = pointer_offset_bits (subtype, ns);
52134 CHECK_RETURN (element_width.has_value ());
53135
54- // this probably doesn't really matter
55- #if 0
56- INVARIANT(
57- element_width > 0,
58- "element width of array should be constant",
59- irep_pretty_diagnosticst(src.type()));
60-
61- INVARIANT(
62- element_width % 8 == 0,
63- "elements in array should be byte-aligned",
64- irep_pretty_diagnosticst(src.type()));
65- #endif
66-
67136 if (!unpack_byte_array && *element_width == 8 )
68137 return src;
69138
70- auto num_elements = numeric_cast<mp_integer>(max_bytes);
71- if (!num_elements.has_value ())
72- num_elements = numeric_cast<mp_integer>(array_type.size ());
73- if (!num_elements.has_value ())
74- throw non_const_array_sizet (array_type, max_bytes);
139+ return unpack_array_vector (
140+ src, array_type.size (), *element_width, little_endian, max_bytes, ns);
141+ }
142+ else if (src.type ().id () == ID_vector)
143+ {
144+ const vector_typet &vector_type = to_vector_type (src.type ());
145+ const typet &subtype = vector_type.subtype ();
75146
76- // all array members will have the same structure; do this just
77- // once and then replace the dummy symbol by a suitable index
78- // expression in the loop below
79- symbol_exprt dummy (ID_C_incomplete, subtype);
80- exprt sub=unpack_rec (dummy, little_endian, max_bytes, ns, true );
147+ auto element_width = pointer_offset_bits (subtype, ns);
148+ CHECK_RETURN (element_width.has_value ());
81149
82- for (mp_integer i=0 ; i<*num_elements; ++i)
83- {
84- index_exprt index (src, from_integer (i, index_type ()));
85- replace_symbolt replace;
86- replace.insert (dummy, index);
150+ if (!unpack_byte_array && *element_width == 8 )
151+ return src;
87152
88- for (const auto &op : sub.operands ())
89- {
90- exprt new_op=op;
91- replace (new_op);
92- simplify (new_op, ns);
93- array.copy_to_operands (new_op);
94- }
95- }
153+ return unpack_array_vector (
154+ src, vector_type.size (), *element_width, little_endian, max_bytes, ns);
96155 }
97156 else if (ns.follow (src.type ()).id ()==ID_struct)
98157 {
99158 const struct_typet &struct_type=to_struct_type (ns.follow (src.type ()));
100159 const struct_typet::componentst &components=struct_type.components ();
101160
161+ exprt::operandst byte_operands;
102162 for (const auto &comp : components)
103163 {
104164 auto element_width = pointer_offset_bits (comp.type (), ns);
@@ -114,9 +174,14 @@ static exprt unpack_rec(
114174 member_exprt member (src, comp.get_name (), comp.type ());
115175 exprt sub=unpack_rec (member, little_endian, max_bytes, ns, true );
116176
117- for ( const auto & op : sub. operands ())
118- array. copy_to_operands (op );
177+ byte_operands. insert (
178+ byte_operands. end (), sub. operands (). begin (), sub. operands (). end () );
119179 }
180+
181+ const std::size_t size = byte_operands.size ();
182+ return array_exprt (
183+ std::move (byte_operands),
184+ array_typet (unsignedbv_typet (8 ), from_integer (size, size_type ())));
120185 }
121186 else if (src.type ().id ()!=ID_empty)
122187 {
@@ -138,6 +203,7 @@ static exprt unpack_rec(
138203 bits = *bits_opt * 8 ;
139204 }
140205
206+ exprt::operandst byte_operands;
141207 for (mp_integer i=0 ; i<bits; i+=8 )
142208 {
143209 extractbits_exprt extractbits (
@@ -146,17 +212,23 @@ static exprt unpack_rec(
146212 from_integer (i, index_type ()),
147213 unsignedbv_typet (8 ));
148214
215+ // endianness_mapt should be the point of reference for mapping out
216+ // endianness, but we need to work on elements here instead of
217+ // individual bits
149218 if (little_endian)
150- array. copy_to_operands (extractbits);
219+ byte_operands. push_back (extractbits);
151220 else
152- array. operands (). insert (array. operands () .begin (), extractbits);
221+ byte_operands. insert (byte_operands .begin (), extractbits);
153222 }
154- }
155223
156- to_array_type (array.type ()).size ()=
157- from_integer (array.operands ().size (), size_type ());
224+ const std::size_t size = byte_operands.size ();
225+ return array_exprt (
226+ std::move (byte_operands),
227+ array_typet (unsignedbv_typet (8 ), from_integer (size, size_type ())));
228+ }
158229
159- return std::move (array);
230+ return array_exprt (
231+ {}, array_typet (unsignedbv_typet (8 ), from_integer (0 , size_type ())));
160232}
161233
162234// / rewrite byte extraction from an array to byte extraction from a
@@ -233,12 +305,16 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
233305 const typet &subtype=array_type.subtype ();
234306
235307 auto element_width = pointer_offset_bits (subtype, ns);
236- const auto num_elements = numeric_cast<mp_integer>(array_type.size ());
237- // TODO: consider ways of dealing with arrays of unknown subtype
238- // size or with a subtype size that does not fit byte boundaries
308+ auto num_elements = numeric_cast<mp_integer>(array_type.size ());
309+ if (!num_elements.has_value ())
310+ num_elements = mp_integer (unpacked.op ().operands ().size ());
311+
312+ // consider ways of dealing with arrays of unknown subtype size or with a
313+ // subtype size that does not fit byte boundaries; currently we fall back to
314+ // stitching together consecutive elements down below
239315 if (
240316 element_width.has_value () && *element_width >= 1 &&
241- *element_width % 8 == 0 && num_elements. has_value () )
317+ *element_width % 8 == 0 )
242318 {
243319 array_exprt array ({}, array_type);
244320
@@ -258,6 +334,41 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
258334 return simplify_expr (array, ns);
259335 }
260336 }
337+ else if (src.type ().id () == ID_vector)
338+ {
339+ const vector_typet &vector_type = to_vector_type (src.type ());
340+ const typet &subtype = vector_type.subtype ();
341+
342+ mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size ());
343+
344+ auto element_width = pointer_offset_bits (subtype, ns);
345+ CHECK_RETURN (element_width.has_value ());
346+
347+ // consider ways of dealing with vectors of unknown subtype size or with a
348+ // subtype size that does not fit byte boundaries; currently we fall back to
349+ // stitching together consecutive elements down below
350+ if (
351+ element_width.has_value () && *element_width >= 1 &&
352+ *element_width % 8 == 0 )
353+ {
354+ vector_exprt vector (vector_type);
355+
356+ for (mp_integer i = 0 ; i < num_elements; ++i)
357+ {
358+ plus_exprt new_offset (
359+ unpacked.offset (),
360+ from_integer (i * (*element_width) / 8 , unpacked.offset ().type ()));
361+
362+ byte_extract_exprt tmp (unpacked);
363+ tmp.type () = subtype;
364+ tmp.offset () = simplify_expr (new_offset, ns);
365+
366+ vector.copy_to_operands (lower_byte_extract (tmp, ns));
367+ }
368+
369+ return simplify_expr (vector, ns);
370+ }
371+ }
261372 else if (ns.follow (src.type ()).id ()==ID_struct)
262373 {
263374 const struct_typet &struct_type=to_struct_type (ns.follow (src.type ()));
@@ -299,10 +410,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
299410 const exprt &root=unpacked.op ();
300411 const exprt &offset=unpacked.offset ();
301412
302- const array_typet &array_type=to_array_type (root.type ());
303- const typet &subtype=array_type.subtype ();
413+ optionalt<typet> subtype;
414+ if (root.type ().id () == ID_vector)
415+ subtype = to_vector_type (root.type ()).subtype ();
416+ else
417+ subtype = to_array_type (root.type ()).subtype ();
304418
305- auto subtype_bits = pointer_offset_bits (subtype, ns);
419+ auto subtype_bits = pointer_offset_bits (* subtype, ns);
306420
307421 DATA_INVARIANT (
308422 subtype_bits.has_value () && *subtype_bits == 8 ,
@@ -344,7 +458,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
344458 else // width_bytes>=2
345459 {
346460 concatenation_exprt concatenation (
347- std::move (op), bitvector_typet (subtype. id (), width_bytes * 8 ));
461+ std::move (op), bitvector_typet (subtype-> id (), width_bytes * 8 ));
348462 return simplify_expr (
349463 typecast_exprt (std::move (concatenation), src.type ()), ns);
350464 }
@@ -649,4 +763,3 @@ exprt lower_byte_operators(const exprt &src, const namespacet &ns)
649763 else
650764 return tmp;
651765}
652- // clang-format on
0 commit comments