From e63dc7efc2f14e04522c430b3da52afcc4a4d0be Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Wed, 17 Feb 2021 17:31:46 -0800 Subject: [PATCH] more rewrite rules --- src/ast/rewriter/seq_axioms.cpp | 49 ++++++++++-- src/ast/rewriter/seq_axioms.h | 2 + src/ast/rewriter/seq_rewriter.cpp | 126 +++++++++++++++++++++++------- src/ast/rewriter/seq_rewriter.h | 1 + src/util/rational.h | 11 ++- 5 files changed, 150 insertions(+), 39 deletions(-) diff --git a/src/ast/rewriter/seq_axioms.cpp b/src/ast/rewriter/seq_axioms.cpp index 5798df1105..e7a4da35cd 100644 --- a/src/ast/rewriter/seq_axioms.cpp +++ b/src/ast/rewriter/seq_axioms.cpp @@ -127,31 +127,34 @@ namespace seq { auto s = purify(_s); auto i = purify(_i); auto l = purify(_l); - if (is_tail(s, i, l)) { + if (is_tail(s, _i, _l)) { tail_axiom(e, s); return; } - if (is_drop_last(s, i, l)) { + if (is_drop_last(s, _i, _l)) { drop_last_axiom(e, s); return; } - if (is_extract_prefix0(s, i, l)) { + if (is_extract_prefix0(s, _i, _l)) { extract_prefix_axiom(e, s, l); return; } + if (is_extract_suffix(s, _i, _l)) { + return; + } expr_ref x = m_sk.mk_pre(s, i); - expr_ref ls = mk_len(s); + expr_ref ls = mk_len(_s); expr_ref lx = mk_len(x); expr_ref le = mk_len(e); - expr_ref ls_minus_i_l(mk_sub(mk_sub(ls, i), l), m); + expr_ref ls_minus_i_l(mk_sub(mk_sub(ls, _i), _l), m); expr_ref y = m_sk.mk_post(s, a.mk_add(i, l)); expr_ref xe = mk_concat(x, e); expr_ref xey = mk_concat(x, e, y); expr_ref zero(a.mk_int(0), m); - expr_ref i_ge_0 = mk_ge(i, 0); - expr_ref i_le_ls = mk_le(mk_sub(i, ls), 0); - expr_ref ls_le_i = mk_le(mk_sub(ls, i), 0); + expr_ref i_ge_0 = mk_ge(_i, 0); + expr_ref i_le_ls = mk_le(mk_sub(_i, ls), 0); + expr_ref ls_le_i = mk_le(mk_sub(ls, _i), 0); expr_ref ls_ge_li = mk_ge(ls_minus_i_l, 0); expr_ref l_ge_0 = mk_ge(l, 0); expr_ref l_le_0 = mk_le(l, 0); @@ -224,6 +227,13 @@ namespace seq { return a.is_numeral(i, i1) && i1.is_zero(); } + bool axioms::is_extract_suffix(expr* s, expr* i, expr* l) { + expr_ref len(a.mk_add(l, i), m); + m_rewrite(len); + return seq.str.is_length(len, l) && l == s; + } + + /* s = ey l <= 0 => e = empty @@ -231,6 +241,7 @@ namespace seq { len(s) < l => e = s */ void axioms::extract_prefix_axiom(expr* e, expr* s, expr* l) { + TRACE("seq", tout << "prefix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << " " << mk_bounded_pp(l, m, 2) << "\n";); expr_ref le = mk_len(e); expr_ref ls = mk_len(s); @@ -244,6 +255,28 @@ namespace seq { add_clause(l_le_s, mk_eq(e, s)); } + /* + s = xe + 0 <= i <= len(s) => i = len(x) + i < 0 => e = empty + i > len(s) => e = empty + */ + void axioms::extract_suffix_axiom(expr* e, expr* s, expr* i) { + TRACE("seq", tout << "suffix " << mk_bounded_pp(e, m, 2) << " " << mk_bounded_pp(s, m, 2) << "\n";); + expr_ref x = m_sk.mk_pre(s, i); + expr_ref lx = mk_len(x); + expr_ref ls = mk_len(s); + expr_ref xe = mk_concat(x, e); + expr_ref emp = mk_eq_empty(e); + expr_ref i_ge_0 = mk_ge(i, 0); + expr_ref i_le_s = mk_le(mk_sub(i, ls), 0); + add_clause(mk_eq(s, xe)); + add_clause(~i_ge_0, ~i_le_s, mk_eq(i, lx)); + add_clause(i_ge_0, emp); + add_clause(i_le_s, emp); + } + + /* encode that s is not contained in of xs1 where s1 is all of s, except the last element. diff --git a/src/ast/rewriter/seq_axioms.h b/src/ast/rewriter/seq_axioms.h index be15a81669..16b166c62e 100644 --- a/src/ast/rewriter/seq_axioms.h +++ b/src/ast/rewriter/seq_axioms.h @@ -62,10 +62,12 @@ namespace seq { bool is_drop_last(expr* s, expr* i, expr* l); bool is_tail(expr* s, expr* i, expr* l); bool is_extract_prefix0(expr* s, expr* i, expr* l); + bool is_extract_suffix(expr* s, expr* i, expr* l); void tail_axiom(expr* e, expr* s); void drop_last_axiom(expr* e, expr* s); void extract_prefix_axiom(expr* e, expr* s, expr* l); + void extract_suffix_axiom(expr* e, expr* s, expr* l); void tightest_prefix(expr* s, expr* x); public: diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 362ee43e2a..f2b22f41bc 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -966,7 +966,6 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu bool constantBase = str().is_string(a, s); bool constantPos = m_autil.is_numeral(b, pos); bool constantLen = m_autil.is_numeral(c, len); - bool lengthPos = str().is_length(b) || m_autil.is_add(b); sort* a_sort = a->get_sort(); sign sg; @@ -988,6 +987,12 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu return BR_DONE; } + unsigned len_a; + if (constantPos && max_length(a, len_a) && rational(len_a) <= pos) { + result = str().mk_empty(a_sort); + return BR_DONE; + } + constantPos &= pos.is_unsigned(); constantLen &= len.is_unsigned(); @@ -1013,20 +1018,21 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu return BR_DONE; } + auto reassemble = [&](rational const& p, expr_ref_vector const& xs) { + expr_ref r(m_autil.mk_int(p), m()); + for (expr* e : xs) + r = m_autil.mk_add(r, str().mk_length(e)); + return r; + }; + // extract(a + b + c, len(a + b), s) -> extract(c, 0, s) // extract(a + b + c, len(a) + len(b), s) -> extract(c, 0, s) - if (lengthPos) { - - m_lhs.reset(); - expr_ref_vector lens(m()); - str().get_concat(a, m_lhs); - TRACE("seq", tout << m_lhs << " " << pos << " " << lens << "\n";); - if (!get_lengths(b, lens, pos) || pos.is_neg()) { - return BR_FAILED; - } + + expr_ref_vector lens(m()); + if (get_lengths(b, lens, pos) && pos >= 0) { unsigned i = 0; - for (; i < m_lhs.size(); ++i) { - expr* lhs = m_lhs.get(i); + for (; i < as.size(); ++i) { + expr* lhs = as.get(i); if (lens.contains(lhs)) { lens.erase(lhs); } @@ -1037,32 +1043,54 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu break; } } - if (i == 0) return BR_FAILED; - expr_ref t1(m()), t2(m()); - t1 = str().mk_concat(m_lhs.size() - i, m_lhs.c_ptr() + i, a->get_sort()); - t2 = m_autil.mk_int(pos); - for (expr* rhs : lens) { - t2 = m_autil.mk_add(t2, str().mk_length(rhs)); + if (i != 0) { + expr_ref t1(m()); + t1 = str().mk_concat(as.size() - i, as.c_ptr() + i, a->get_sort()); + expr_ref t2 = reassemble(pos, lens); + result = str().mk_substr(t1, t2, c); + TRACE("seq", tout << result << "\n";); + return BR_REWRITE2; } - result = str().mk_substr(t1, t2, c); - TRACE("seq", tout << result << "\n";); - return BR_REWRITE2; } + if (!constantPos) { return BR_FAILED; } unsigned _pos = pos.get_unsigned(); - // (extract s 0 (len s)) = s - expr* a2 = nullptr; - if (_pos == 0 && str().is_length(c, a2)) { - m_lhs.reset(); - str().get_concat(a, m_lhs); - if (!m_lhs.empty() && m_lhs.get(0) == a2) { - result = a2; + // extract(a + b + c, 0, len(a) + len(b)) -> c + if (pos.is_zero()) { + lens.reset(); + if (!get_lengths(c, lens, pos) || pos.is_neg()) + return BR_FAILED; + unsigned i = 0; + for (; i < as.size(); ++i) { + expr* lhs = as.get(i); + if (lens.contains(lhs)) { + lens.erase(lhs); + } + else if (str().is_unit(lhs) && pos.is_pos()) { + pos -= rational(1); + } + else { + break; + } + } + if (i == as.size()) { + result = a; return BR_DONE; } + else if (i != 0) { + expr_ref t1(m()), t2(m()); + t1 = str().mk_concat(as.size() - i, as.c_ptr() + i, a->get_sort()); + t2 = reassemble(pos, lens); + result = str().mk_substr(t1, b, t2); + as[i] = result; + result = str().mk_concat(i + 1, as.c_ptr(), a->get_sort()); + TRACE("seq", tout << result << "\n";); + return BR_REWRITE2; + } } expr* a1 = nullptr, *b1 = nullptr, *c1 = nullptr; @@ -1123,16 +1151,23 @@ br_status seq_rewriter::mk_seq_extract(expr* a, expr* b, expr* c, expr_ref& resu } bool seq_rewriter::get_lengths(expr* e, expr_ref_vector& lens, rational& pos) { - expr* arg = nullptr; + expr* arg = nullptr, *e1 = nullptr, *e2 = nullptr; rational pos1; if (m_autil.is_add(e)) { for (expr* arg1 : *to_app(e)) { - if (!get_lengths(arg1, lens, pos)) return false; + if (!get_lengths(arg1, lens, pos)) + return false; } } else if (str().is_length(e, arg)) { lens.push_back(arg); } + else if (m_autil.is_mul(e, e1, e2) && m_autil.is_numeral(e1, pos1) && str().is_length(e2, arg) && pos1 <= 10) { + while (pos1 > 0) { + lens.push_back(arg); + pos1 -= rational(1); + } + } else if (m_autil.is_numeral(e, pos1)) { pos += pos1; } @@ -4496,6 +4531,37 @@ bool seq_rewriter::min_length(expr_ref_vector const& es, unsigned& len) { return bounded; } +bool seq_rewriter::max_length(expr* e, unsigned& len) { + if (str().is_unit(e)) { + len = 1; + return true; + } + if (str().is_at(e)) { + len = 1; + return true; + } + zstring s; + if (str().is_string(e, s)) { + len = s.length(); + return true; + } + if (str().is_empty(e)) { + len = 0; + return true; + } + if (str().is_concat(e)) { + unsigned l = 0; + len = 0; + for (expr* arg : *to_app(e)) { + if (!max_length(arg, l)) + return false; + len += l; + } + return true; + } + return false; +} + bool seq_rewriter::is_string(unsigned n, expr* const* es, zstring& s) const { zstring s1; expr* e; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index d503ba0229..314a83d4f4 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -282,6 +282,7 @@ class seq_rewriter { bool reduce_itos(expr_ref_vector& ls, expr_ref_vector& rs, expr_ref_pair_vector& eqs); bool reduce_eq_empty(expr* l, expr* r, expr_ref& result); bool min_length(expr_ref_vector const& es, unsigned& len); + bool max_length(expr* e, unsigned& len); expr* concat_non_empty(expr_ref_vector& es); bool is_string(unsigned n, expr* const* es, zstring& s) const; diff --git a/src/util/rational.h b/src/util/rational.h index e0a76d9eed..7bf8002949 100644 --- a/src/util/rational.h +++ b/src/util/rational.h @@ -500,7 +500,16 @@ inline bool operator>(int a, rational const & b) { } -inline bool operator!=(rational const & a, int b) { +inline bool operator>=(rational const& a, int b) { + return a >= rational(b); +} + +inline bool operator>=(int a, rational const& b) { + return rational(a) >= b; +} + + +inline bool operator!=(rational const& a, int b) { return !(a == rational(b)); }