Skip to content

Commit ab1ad7f

Browse files
committed
byte_extract lowering of vectors and array cleanup
Refactor the code used for arrays to make it re-usable for vectors and arrays.
1 parent 5e69625 commit ab1ad7f

File tree

3 files changed

+184
-71
lines changed

3 files changed

+184
-71
lines changed

src/solvers/lowering/byte_operators.cpp

Lines changed: 180 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,102 @@ Author: Daniel Kroening, kroening@kroening.com
1818

1919
#include "flatten_byte_extract_exceptions.h"
2020

21-
// clang-format off
21+
static exprt unpack_rec(
22+
const exprt &src,
23+
bool little_endian,
24+
const exprt &max_bytes,
25+
const namespacet &ns,
26+
bool unpack_byte_array = false);
27+
28+
/// Rewrite an array or vector into its individual bytes.
29+
/// \param src: array/vector to unpack
30+
/// \param src_size: array/vector size; if not a constant, \p max_bytes must be
31+
/// a constant value, otherwise we fail with an exception
32+
/// \param element_width: bit width of array/vector elements
33+
/// \param little_endian: true, iff assumed endianness is little-endian
34+
/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
35+
/// unpack
36+
/// \param ns: namespace for type lookups
37+
/// \return array_exprt holding unpacked elements
38+
static array_exprt unpack_array_vector(
39+
const exprt &src,
40+
const exprt &src_size,
41+
const mp_integer &element_width,
42+
bool little_endian,
43+
const exprt &max_bytes,
44+
const namespacet &ns)
45+
{
46+
auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
47+
auto num_elements = numeric_cast<mp_integer>(src_size);
48+
49+
if(!max_bytes_int && !num_elements)
50+
{
51+
throw non_const_array_sizet(src.type(), max_bytes);
52+
}
53+
54+
mp_integer first_element = 0;
55+
56+
// refine the number of elements to extract in case the element width is known
57+
// and a multiple of bytes; otherwise we will expand the entire array/vector
58+
optionalt<mp_integer> max_elements;
59+
if(element_width > 0 && element_width % 8 == 0)
60+
{
61+
mp_integer el_bytes = element_width / 8;
62+
63+
// turn bytes into elements
64+
if(!num_elements)
65+
{
66+
// round up to nearest multiple of el_bytes
67+
max_elements = (*max_bytes_int + el_bytes - 1) / el_bytes;
68+
}
69+
}
2270

23-
/// rewrite an object into its individual bytes
24-
/// \par parameters: src object to unpack
25-
/// little_endian true, iff assumed endianness is little-endian
26-
/// max_bytes if not nil, use as upper bound of the number of bytes to unpack
27-
/// ns namespace for type lookups
71+
// the maximum number of bytes is an upper bound in case the size of the
72+
// array/vector is unknown; if the element_width was usable above this will
73+
// have been turned into a number of elements already
74+
if(!num_elements)
75+
num_elements = *max_elements;
76+
77+
const exprt src_simp = simplify_expr(src, ns);
78+
79+
exprt::operandst byte_operands;
80+
for(mp_integer i = first_element; i < *num_elements; ++i)
81+
{
82+
exprt element;
83+
84+
if(
85+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
86+
i < src_simp.operands().size())
87+
{
88+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
89+
element = src_simp.operands()[index_int];
90+
}
91+
else
92+
{
93+
element = index_exprt(src_simp, from_integer(i, index_type()));
94+
}
95+
96+
// recursively unpack each element until so that we eventually just have an
97+
// array of bytes left
98+
exprt sub = unpack_rec(element, little_endian, max_bytes, ns, true);
99+
byte_operands.insert(
100+
byte_operands.end(), sub.operands().begin(), sub.operands().end());
101+
}
102+
103+
const std::size_t size = byte_operands.size();
104+
return array_exprt(
105+
std::move(byte_operands),
106+
array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
107+
}
108+
109+
/// Rewrite an object into its individual bytes.
110+
/// \param src: object to unpack
111+
/// \param little_endian: true, iff assumed endianness is little-endian
112+
/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
113+
/// unpack
114+
/// \param ns: namespace for type lookups
115+
/// \param unpack_byte_array: if true, return unmodified \p src iff it is an
116+
// array of bytes
28117
/// \return array of bytes in the sequence found in memory
29118
/// \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
30119
/// object because of either non constant size, byte misalignment or
@@ -34,15 +123,8 @@ static exprt unpack_rec(
34123
bool little_endian,
35124
const exprt &max_bytes,
36125
const namespacet &ns,
37-
bool unpack_byte_array=false)
126+
bool unpack_byte_array)
38127
{
39-
array_exprt array({},
40-
array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
41-
42-
// endianness_mapt should be the point of reference for mapping out
43-
// endianness, but we need to work on elements here instead of
44-
// individual bits
45-
46128
if(src.type().id()==ID_array)
47129
{
48130
const array_typet &array_type=to_array_type(src.type());
@@ -51,54 +133,32 @@ static exprt unpack_rec(
51133
auto element_width = pointer_offset_bits(subtype, ns);
52134
CHECK_RETURN(element_width.has_value());
53135

54-
// this probably doesn't really matter
55-
#if 0
56-
INVARIANT(
57-
element_width > 0,
58-
"element width of array should be constant",
59-
irep_pretty_diagnosticst(src.type()));
60-
61-
INVARIANT(
62-
element_width % 8 == 0,
63-
"elements in array should be byte-aligned",
64-
irep_pretty_diagnosticst(src.type()));
65-
#endif
66-
67136
if(!unpack_byte_array && *element_width == 8)
68137
return src;
69138

70-
auto num_elements = numeric_cast<mp_integer>(max_bytes);
71-
if(!num_elements.has_value())
72-
num_elements = numeric_cast<mp_integer>(array_type.size());
73-
if(!num_elements.has_value())
74-
throw non_const_array_sizet(array_type, max_bytes);
139+
return unpack_array_vector(
140+
src, array_type.size(), *element_width, little_endian, max_bytes, ns);
141+
}
142+
else if(src.type().id() == ID_vector)
143+
{
144+
const vector_typet &vector_type = to_vector_type(src.type());
145+
const typet &subtype = vector_type.subtype();
75146

76-
// all array members will have the same structure; do this just
77-
// once and then replace the dummy symbol by a suitable index
78-
// expression in the loop below
79-
symbol_exprt dummy(ID_C_incomplete, subtype);
80-
exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true);
147+
auto element_width = pointer_offset_bits(subtype, ns);
148+
CHECK_RETURN(element_width.has_value());
81149

82-
for(mp_integer i=0; i<*num_elements; ++i)
83-
{
84-
index_exprt index(src, from_integer(i, index_type()));
85-
replace_symbolt replace;
86-
replace.insert(dummy, index);
150+
if(!unpack_byte_array && *element_width == 8)
151+
return src;
87152

88-
for(const auto &op : sub.operands())
89-
{
90-
exprt new_op=op;
91-
replace(new_op);
92-
simplify(new_op, ns);
93-
array.copy_to_operands(new_op);
94-
}
95-
}
153+
return unpack_array_vector(
154+
src, vector_type.size(), *element_width, little_endian, max_bytes, ns);
96155
}
97156
else if(ns.follow(src.type()).id()==ID_struct)
98157
{
99158
const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
100159
const struct_typet::componentst &components=struct_type.components();
101160

161+
exprt::operandst byte_operands;
102162
for(const auto &comp : components)
103163
{
104164
auto element_width = pointer_offset_bits(comp.type(), ns);
@@ -114,9 +174,14 @@ static exprt unpack_rec(
114174
member_exprt member(src, comp.get_name(), comp.type());
115175
exprt sub=unpack_rec(member, little_endian, max_bytes, ns, true);
116176

117-
for(const auto& op : sub.operands())
118-
array.copy_to_operands(op);
177+
byte_operands.insert(
178+
byte_operands.end(), sub.operands().begin(), sub.operands().end());
119179
}
180+
181+
const std::size_t size = byte_operands.size();
182+
return array_exprt(
183+
std::move(byte_operands),
184+
array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
120185
}
121186
else if(src.type().id()!=ID_empty)
122187
{
@@ -138,6 +203,7 @@ static exprt unpack_rec(
138203
bits = *bits_opt * 8;
139204
}
140205

206+
exprt::operandst byte_operands;
141207
for(mp_integer i=0; i<bits; i+=8)
142208
{
143209
extractbits_exprt extractbits(
@@ -146,17 +212,23 @@ static exprt unpack_rec(
146212
from_integer(i, index_type()),
147213
unsignedbv_typet(8));
148214

215+
// endianness_mapt should be the point of reference for mapping out
216+
// endianness, but we need to work on elements here instead of
217+
// individual bits
149218
if(little_endian)
150-
array.copy_to_operands(extractbits);
219+
byte_operands.push_back(extractbits);
151220
else
152-
array.operands().insert(array.operands().begin(), extractbits);
221+
byte_operands.insert(byte_operands.begin(), extractbits);
153222
}
154-
}
155223

156-
to_array_type(array.type()).size()=
157-
from_integer(array.operands().size(), size_type());
224+
const std::size_t size = byte_operands.size();
225+
return array_exprt(
226+
std::move(byte_operands),
227+
array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
228+
}
158229

159-
return std::move(array);
230+
return array_exprt(
231+
{}, array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
160232
}
161233

162234
/// rewrite byte extraction from an array to byte extraction from a
@@ -233,12 +305,16 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
233305
const typet &subtype=array_type.subtype();
234306

235307
auto element_width = pointer_offset_bits(subtype, ns);
236-
const auto num_elements = numeric_cast<mp_integer>(array_type.size());
237-
// TODO: consider ways of dealing with arrays of unknown subtype
238-
// size or with a subtype size that does not fit byte boundaries
308+
auto num_elements = numeric_cast<mp_integer>(array_type.size());
309+
if(!num_elements.has_value())
310+
num_elements = mp_integer(unpacked.op().operands().size());
311+
312+
// consider ways of dealing with arrays of unknown subtype size or with a
313+
// subtype size that does not fit byte boundaries; currently we fall back to
314+
// stitching together consecutive elements down below
239315
if(
240316
element_width.has_value() && *element_width >= 1 &&
241-
*element_width % 8 == 0 && num_elements.has_value())
317+
*element_width % 8 == 0)
242318
{
243319
array_exprt array({}, array_type);
244320

@@ -258,6 +334,41 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
258334
return simplify_expr(array, ns);
259335
}
260336
}
337+
else if(src.type().id() == ID_vector)
338+
{
339+
const vector_typet &vector_type = to_vector_type(src.type());
340+
const typet &subtype = vector_type.subtype();
341+
342+
mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size());
343+
344+
auto element_width = pointer_offset_bits(subtype, ns);
345+
CHECK_RETURN(element_width.has_value());
346+
347+
// consider ways of dealing with vectors of unknown subtype size or with a
348+
// subtype size that does not fit byte boundaries; currently we fall back to
349+
// stitching together consecutive elements down below
350+
if(
351+
element_width.has_value() && *element_width >= 1 &&
352+
*element_width % 8 == 0)
353+
{
354+
vector_exprt vector(vector_type);
355+
356+
for(mp_integer i = 0; i < num_elements; ++i)
357+
{
358+
plus_exprt new_offset(
359+
unpacked.offset(),
360+
from_integer(i * (*element_width) / 8, unpacked.offset().type()));
361+
362+
byte_extract_exprt tmp(unpacked);
363+
tmp.type() = subtype;
364+
tmp.offset() = simplify_expr(new_offset, ns);
365+
366+
vector.copy_to_operands(lower_byte_extract(tmp, ns));
367+
}
368+
369+
return simplify_expr(vector, ns);
370+
}
371+
}
261372
else if(ns.follow(src.type()).id()==ID_struct)
262373
{
263374
const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
@@ -299,10 +410,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
299410
const exprt &root=unpacked.op();
300411
const exprt &offset=unpacked.offset();
301412

302-
const array_typet &array_type=to_array_type(root.type());
303-
const typet &subtype=array_type.subtype();
413+
optionalt<typet> subtype;
414+
if(root.type().id() == ID_vector)
415+
subtype = to_vector_type(root.type()).subtype();
416+
else
417+
subtype = to_array_type(root.type()).subtype();
304418

305-
auto subtype_bits = pointer_offset_bits(subtype, ns);
419+
auto subtype_bits = pointer_offset_bits(*subtype, ns);
306420

307421
DATA_INVARIANT(
308422
subtype_bits.has_value() && *subtype_bits == 8,
@@ -344,7 +458,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
344458
else // width_bytes>=2
345459
{
346460
concatenation_exprt concatenation(
347-
std::move(op), bitvector_typet(subtype.id(), width_bytes * 8));
461+
std::move(op), bitvector_typet(subtype->id(), width_bytes * 8));
348462
return simplify_expr(
349463
typecast_exprt(std::move(concatenation), src.type()), ns);
350464
}
@@ -649,4 +763,3 @@ exprt lower_byte_operators(const exprt &src, const namespacet &ns)
649763
else
650764
return tmp;
651765
}
652-
// clang-format on

src/solvers/lowering/flatten_byte_extract_exceptions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class flatten_byte_extract_exceptiont : public std::runtime_error
2828
class non_const_array_sizet : public flatten_byte_extract_exceptiont
2929
{
3030
public:
31-
non_const_array_sizet(const array_typet &array_type, const exprt &max_bytes)
31+
non_const_array_sizet(const typet &array_type, const exprt &max_bytes)
3232
: flatten_byte_extract_exceptiont("cannot unpack array of non-const size"),
3333
max_bytes(max_bytes),
3434
array_type(array_type)
@@ -47,7 +47,7 @@ class non_const_array_sizet : public flatten_byte_extract_exceptiont
4747

4848
private:
4949
exprt max_bytes;
50-
array_typet array_type;
50+
typet array_type;
5151

5252
std::string computed_error_message;
5353
};

unit/solvers/lowering/byte_operators.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
125125
signedbv_typet(128),
126126
// ieee_float_spect::single_precision().to_type(),
127127
// pointer_typet(u64, 64),
128-
// vector_typet(u8, size),
129-
// vector_typet(u64, size),
128+
vector_typet(u8, size),
129+
vector_typet(u64, size),
130130
// complex_typet(s16),
131131
// complex_typet(u64)
132132
};

0 commit comments

Comments
 (0)