Skip to content

Commit 5c38606

Browse files
committed
byte_extract lowering of vectors and array cleanup
Refactor the code used for arrays to make it re-usable for vectors and arrays.
1 parent 5e69625 commit 5c38606

File tree

3 files changed

+183
-71
lines changed

3 files changed

+183
-71
lines changed

src/solvers/lowering/byte_operators.cpp

Lines changed: 179 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,101 @@ Author: Daniel Kroening, kroening@kroening.com
1818

1919
#include "flatten_byte_extract_exceptions.h"
2020

21-
// clang-format off
21+
static exprt unpack_rec(
22+
const exprt &src,
23+
bool little_endian,
24+
const exprt &max_bytes,
25+
const namespacet &ns,
26+
bool unpack_byte_array = false);
27+
28+
/// Rewrite an array or vector into its individual bytes.
29+
/// \param src: array/vector to unpack
30+
/// \param src_size: array/vector size; if not a constant, \p max_bytes must be
31+
/// a constant value, otherwise we fail with an exception
32+
/// \param element_width: bit width of array/vector elements
33+
/// \param little_endian: true, iff assumed endianness is little-endian
34+
/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
35+
/// unpack
36+
/// \param ns: namespace for type lookups
37+
/// \return array_exprt holding unpacked elements
38+
static array_exprt unpack_array_vector(
39+
const exprt &src,
40+
const exprt &src_size,
41+
const mp_integer &element_width,
42+
bool little_endian,
43+
const exprt &max_bytes,
44+
const namespacet &ns)
45+
{
46+
auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
47+
auto num_elements = numeric_cast<mp_integer>(src_size);
48+
49+
if(!max_bytes_int && !num_elements)
50+
{
51+
throw non_const_array_sizet(src.type(), max_bytes);
52+
}
53+
54+
mp_integer first_element = 0;
55+
56+
// refine the number of elements to extract in case the element width is known
57+
// and a multiple of bytes; otherwise we will expand the entire array/vector
58+
optionalt<mp_integer> max_elements;
59+
if(element_width > 0 && element_width % 8 == 0)
60+
{
61+
mp_integer el_bytes = element_width / 8;
62+
63+
if(!num_elements)
64+
{
65+
// turn bytes into elements, rounding up
66+
max_elements = (*max_bytes_int + el_bytes - 1) / el_bytes;
67+
}
68+
}
2269

23-
/// rewrite an object into its individual bytes
24-
/// \par parameters: src object to unpack
25-
/// little_endian true, iff assumed endianness is little-endian
26-
/// max_bytes if not nil, use as upper bound of the number of bytes to unpack
27-
/// ns namespace for type lookups
70+
// the maximum number of bytes is an upper bound in case the size of the
71+
// array/vector is unknown; if the element_width was usable above this will
72+
// have been turned into a number of elements already
73+
if(!num_elements)
74+
num_elements = *max_elements;
75+
76+
const exprt src_simp = simplify_expr(src, ns);
77+
78+
exprt::operandst byte_operands;
79+
for(mp_integer i = first_element; i < *num_elements; ++i)
80+
{
81+
exprt element;
82+
83+
if(
84+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
85+
i < src_simp.operands().size())
86+
{
87+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
88+
element = src_simp.operands()[index_int];
89+
}
90+
else
91+
{
92+
element = index_exprt(src_simp, from_integer(i, index_type()));
93+
}
94+
95+
// recursively unpack each element until so that we eventually just have an
96+
// array of bytes left
97+
exprt sub = unpack_rec(element, little_endian, max_bytes, ns, true);
98+
byte_operands.insert(
99+
byte_operands.end(), sub.operands().begin(), sub.operands().end());
100+
}
101+
102+
const std::size_t size = byte_operands.size();
103+
return array_exprt(
104+
std::move(byte_operands),
105+
array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
106+
}
107+
108+
/// Rewrite an object into its individual bytes.
109+
/// \param src: object to unpack
110+
/// \param little_endian: true, iff assumed endianness is little-endian
111+
/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
112+
/// unpack
113+
/// \param ns: namespace for type lookups
114+
/// \param unpack_byte_array: if true, return unmodified \p src iff it is an
115+
// array of bytes
28116
/// \return array of bytes in the sequence found in memory
29117
/// \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
30118
/// object because of either non constant size, byte misalignment or
@@ -34,15 +122,8 @@ static exprt unpack_rec(
34122
bool little_endian,
35123
const exprt &max_bytes,
36124
const namespacet &ns,
37-
bool unpack_byte_array=false)
125+
bool unpack_byte_array)
38126
{
39-
array_exprt array({},
40-
array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
41-
42-
// endianness_mapt should be the point of reference for mapping out
43-
// endianness, but we need to work on elements here instead of
44-
// individual bits
45-
46127
if(src.type().id()==ID_array)
47128
{
48129
const array_typet &array_type=to_array_type(src.type());
@@ -51,54 +132,32 @@ static exprt unpack_rec(
51132
auto element_width = pointer_offset_bits(subtype, ns);
52133
CHECK_RETURN(element_width.has_value());
53134

54-
// this probably doesn't really matter
55-
#if 0
56-
INVARIANT(
57-
element_width > 0,
58-
"element width of array should be constant",
59-
irep_pretty_diagnosticst(src.type()));
60-
61-
INVARIANT(
62-
element_width % 8 == 0,
63-
"elements in array should be byte-aligned",
64-
irep_pretty_diagnosticst(src.type()));
65-
#endif
66-
67135
if(!unpack_byte_array && *element_width == 8)
68136
return src;
69137

70-
auto num_elements = numeric_cast<mp_integer>(max_bytes);
71-
if(!num_elements.has_value())
72-
num_elements = numeric_cast<mp_integer>(array_type.size());
73-
if(!num_elements.has_value())
74-
throw non_const_array_sizet(array_type, max_bytes);
138+
return unpack_array_vector(
139+
src, array_type.size(), *element_width, little_endian, max_bytes, ns);
140+
}
141+
else if(src.type().id() == ID_vector)
142+
{
143+
const vector_typet &vector_type = to_vector_type(src.type());
144+
const typet &subtype = vector_type.subtype();
75145

76-
// all array members will have the same structure; do this just
77-
// once and then replace the dummy symbol by a suitable index
78-
// expression in the loop below
79-
symbol_exprt dummy(ID_C_incomplete, subtype);
80-
exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true);
146+
auto element_width = pointer_offset_bits(subtype, ns);
147+
CHECK_RETURN(element_width.has_value());
81148

82-
for(mp_integer i=0; i<*num_elements; ++i)
83-
{
84-
index_exprt index(src, from_integer(i, index_type()));
85-
replace_symbolt replace;
86-
replace.insert(dummy, index);
149+
if(!unpack_byte_array && *element_width == 8)
150+
return src;
87151

88-
for(const auto &op : sub.operands())
89-
{
90-
exprt new_op=op;
91-
replace(new_op);
92-
simplify(new_op, ns);
93-
array.copy_to_operands(new_op);
94-
}
95-
}
152+
return unpack_array_vector(
153+
src, vector_type.size(), *element_width, little_endian, max_bytes, ns);
96154
}
97155
else if(ns.follow(src.type()).id()==ID_struct)
98156
{
99157
const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
100158
const struct_typet::componentst &components=struct_type.components();
101159

160+
exprt::operandst byte_operands;
102161
for(const auto &comp : components)
103162
{
104163
auto element_width = pointer_offset_bits(comp.type(), ns);
@@ -114,9 +173,14 @@ static exprt unpack_rec(
114173
member_exprt member(src, comp.get_name(), comp.type());
115174
exprt sub=unpack_rec(member, little_endian, max_bytes, ns, true);
116175

117-
for(const auto& op : sub.operands())
118-
array.copy_to_operands(op);
176+
byte_operands.insert(
177+
byte_operands.end(), sub.operands().begin(), sub.operands().end());
119178
}
179+
180+
const std::size_t size = byte_operands.size();
181+
return array_exprt(
182+
std::move(byte_operands),
183+
array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
120184
}
121185
else if(src.type().id()!=ID_empty)
122186
{
@@ -138,6 +202,7 @@ static exprt unpack_rec(
138202
bits = *bits_opt * 8;
139203
}
140204

205+
exprt::operandst byte_operands;
141206
for(mp_integer i=0; i<bits; i+=8)
142207
{
143208
extractbits_exprt extractbits(
@@ -146,17 +211,23 @@ static exprt unpack_rec(
146211
from_integer(i, index_type()),
147212
unsignedbv_typet(8));
148213

214+
// endianness_mapt should be the point of reference for mapping out
215+
// endianness, but we need to work on elements here instead of
216+
// individual bits
149217
if(little_endian)
150-
array.copy_to_operands(extractbits);
218+
byte_operands.push_back(extractbits);
151219
else
152-
array.operands().insert(array.operands().begin(), extractbits);
220+
byte_operands.insert(byte_operands.begin(), extractbits);
153221
}
154-
}
155222

156-
to_array_type(array.type()).size()=
157-
from_integer(array.operands().size(), size_type());
223+
const std::size_t size = byte_operands.size();
224+
return array_exprt(
225+
std::move(byte_operands),
226+
array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
227+
}
158228

159-
return std::move(array);
229+
return array_exprt(
230+
{}, array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
160231
}
161232

162233
/// rewrite byte extraction from an array to byte extraction from a
@@ -233,12 +304,16 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
233304
const typet &subtype=array_type.subtype();
234305

235306
auto element_width = pointer_offset_bits(subtype, ns);
236-
const auto num_elements = numeric_cast<mp_integer>(array_type.size());
237-
// TODO: consider ways of dealing with arrays of unknown subtype
238-
// size or with a subtype size that does not fit byte boundaries
307+
auto num_elements = numeric_cast<mp_integer>(array_type.size());
308+
if(!num_elements.has_value())
309+
num_elements = mp_integer(unpacked.op().operands().size());
310+
311+
// consider ways of dealing with arrays of unknown subtype size or with a
312+
// subtype size that does not fit byte boundaries; currently we fall back to
313+
// stitching together consecutive elements down below
239314
if(
240315
element_width.has_value() && *element_width >= 1 &&
241-
*element_width % 8 == 0 && num_elements.has_value())
316+
*element_width % 8 == 0)
242317
{
243318
array_exprt array({}, array_type);
244319

@@ -258,6 +333,41 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
258333
return simplify_expr(array, ns);
259334
}
260335
}
336+
else if(src.type().id() == ID_vector)
337+
{
338+
const vector_typet &vector_type = to_vector_type(src.type());
339+
const typet &subtype = vector_type.subtype();
340+
341+
mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size());
342+
343+
auto element_width = pointer_offset_bits(subtype, ns);
344+
CHECK_RETURN(element_width.has_value());
345+
346+
// consider ways of dealing with vectors of unknown subtype size or with a
347+
// subtype size that does not fit byte boundaries; currently we fall back to
348+
// stitching together consecutive elements down below
349+
if(
350+
element_width.has_value() && *element_width >= 1 &&
351+
*element_width % 8 == 0)
352+
{
353+
vector_exprt vector(vector_type);
354+
355+
for(mp_integer i = 0; i < num_elements; ++i)
356+
{
357+
plus_exprt new_offset(
358+
unpacked.offset(),
359+
from_integer(i * (*element_width) / 8, unpacked.offset().type()));
360+
361+
byte_extract_exprt tmp(unpacked);
362+
tmp.type() = subtype;
363+
tmp.offset() = simplify_expr(new_offset, ns);
364+
365+
vector.copy_to_operands(lower_byte_extract(tmp, ns));
366+
}
367+
368+
return simplify_expr(vector, ns);
369+
}
370+
}
261371
else if(ns.follow(src.type()).id()==ID_struct)
262372
{
263373
const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
@@ -299,10 +409,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
299409
const exprt &root=unpacked.op();
300410
const exprt &offset=unpacked.offset();
301411

302-
const array_typet &array_type=to_array_type(root.type());
303-
const typet &subtype=array_type.subtype();
412+
optionalt<typet> subtype;
413+
if(root.type().id() == ID_vector)
414+
subtype = to_vector_type(root.type()).subtype();
415+
else
416+
subtype = to_array_type(root.type()).subtype();
304417

305-
auto subtype_bits = pointer_offset_bits(subtype, ns);
418+
auto subtype_bits = pointer_offset_bits(*subtype, ns);
306419

307420
DATA_INVARIANT(
308421
subtype_bits.has_value() && *subtype_bits == 8,
@@ -344,7 +457,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
344457
else // width_bytes>=2
345458
{
346459
concatenation_exprt concatenation(
347-
std::move(op), bitvector_typet(subtype.id(), width_bytes * 8));
460+
std::move(op), bitvector_typet(subtype->id(), width_bytes * 8));
348461
return simplify_expr(
349462
typecast_exprt(std::move(concatenation), src.type()), ns);
350463
}
@@ -649,4 +762,3 @@ exprt lower_byte_operators(const exprt &src, const namespacet &ns)
649762
else
650763
return tmp;
651764
}
652-
// clang-format on

src/solvers/lowering/flatten_byte_extract_exceptions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class flatten_byte_extract_exceptiont : public std::runtime_error
2828
class non_const_array_sizet : public flatten_byte_extract_exceptiont
2929
{
3030
public:
31-
non_const_array_sizet(const array_typet &array_type, const exprt &max_bytes)
31+
non_const_array_sizet(const typet &array_type, const exprt &max_bytes)
3232
: flatten_byte_extract_exceptiont("cannot unpack array of non-const size"),
3333
max_bytes(max_bytes),
3434
array_type(array_type)
@@ -47,7 +47,7 @@ class non_const_array_sizet : public flatten_byte_extract_exceptiont
4747

4848
private:
4949
exprt max_bytes;
50-
array_typet array_type;
50+
typet array_type;
5151

5252
std::string computed_error_message;
5353
};

unit/solvers/lowering/byte_operators.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
125125
signedbv_typet(128),
126126
// ieee_float_spect::single_precision().to_type(),
127127
// pointer_typet(u64, 64),
128-
// vector_typet(u8, size),
129-
// vector_typet(u64, size),
128+
vector_typet(u8, size),
129+
vector_typet(u64, size),
130130
// complex_typet(s16),
131131
// complex_typet(u64)
132132
};

0 commit comments

Comments
 (0)