Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified regression/strings-smoke-tests/java_case/test_case.class
Binary file not shown.
2 changes: 1 addition & 1 deletion regression/strings-smoke-tests/java_contains/test.desc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CORE
test_contains.class
--refine-strings
--refine-strings --string-max-length 100
^EXIT=10$
^SIGNAL=0$
^\[.*assertion.1\].* line 8.* SUCCESS$
Expand Down
2 changes: 1 addition & 1 deletion src/doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,7 @@ FORMULA_TRANSPARENT = YES
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

USE_MATHJAX = NO
USE_MATHJAX = YES

# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
Expand Down
33 changes: 33 additions & 0 deletions src/solvers/refinement/string_constraint.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,30 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com
#include <solvers/refinement/bv_refinement.h>
#include <solvers/refinement/string_refinement_invariant.h>
#include <util/refined_string_type.h>
#include <langapi/language_util.h>

/*! \brief Universally quantified string constraint

This represents a universally quantified string constraint as laid out in
DOI: 10.1007/978-3-319-03077-7. The paper seems to specify a universal
constraint as follows.

A universal constraint is of the form \f$\forall n. L(n) \rightarrow
P(n, s_0,\ldots, s_k)\f$ where

1. \f$L(n)\f$ does not contain string indices [possibly not required, but
implied by examples]
2. \f$\forall n. L(n) \rightarrow P'\left(s_0[f_0(n)],\ldots, s_k[f_k(n)]
\right)\f$, i.e. when focusing on one specific string, all indices are
the same [stated in a roundabout manner]
3. Each \f$f\f$ is linear and therefore has an inverse [explicitly stated]
4. \f$L(n)\f$ and \f$P(n, s_0,\ldots, s_k)\f$ may contain other (free)
variables, but in \f$P\f$, \f$n\f$ can only occur as an argument to an
\f$f\f$ [explicitly stated, implied]

We extend this slightly by restricting n to be in a specific range, but this
is another implication which can be pushed in to \f$L(n)\f$.
*/

class string_constraintt: public exprt
{
Expand Down Expand Up @@ -114,6 +138,15 @@ extern inline string_constraintt &to_string_constraint(exprt &expr)
return static_cast<string_constraintt &>(expr);
}

inline static std::string from_expr(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see that this function is used anywhere. Do you wish to keep it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it's used in about 4 or 5 places, mostly in string_refinementt::is_valid_string_constraint.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh alright then!

const namespacet &ns,
const irep_idt &identifier,
const string_constraintt &expr)
{
return from_expr(ns, identifier, expr.premise())+" => "+
from_expr(ns, identifier, expr.body());
}

class string_not_contains_constraintt: public exprt
{
public:
Expand Down
168 changes: 48 additions & 120 deletions src/solvers/refinement/string_constraint_generator_indexof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com
#include <solvers/refinement/string_constraint_generator.h>

/// Add axioms stating that the returned value is the index within str of the
/// first occurence of c starting the search at from_index, or -1 if no such
/// first occurrence of c starting the search at from_index, or -1 if no such
/// character occurs at or after position from_index.
/// \param str: a string expression
/// \param c: an expression representing a character
Expand Down Expand Up @@ -69,7 +69,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of(
}

/// Add axioms stating that the returned value is the index within haystack of
/// the first occurence of needle starting the search at from_index, or -1 if
/// the first occurrence of needle starting the search at from_index, or -1 if
/// needle does not occur at or after position from_index.
/// \param haystack: a string expression
/// \param needle: a string expression
Expand All @@ -92,7 +92,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string(
// contains ==> haystack[n+offset]=needle[n]
// a4 : forall n:[from_index,offset[.
// contains ==> (exists m:[0,|needle|[. haystack[m+n] != needle[m]])
// a5: forall n:[from_index,|haystack|-|needle|[.
// a5: forall n:[from_index,|haystack|-|needle|].
// !contains ==> (exists m:[0,|needle|[. haystack[m+n] != needle[m])

implies_exprt a1(
Expand All @@ -116,70 +116,35 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string(
equal_exprt(haystack[plus_exprt(qvar, offset)], needle[qvar]));
axioms.push_back(a3);

if(!is_constant_string(needle))
{
// string_not contains_constraintt are formulas of the form:
// forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y]
string_not_contains_constraintt a4(
from_index,
offset,
contains,
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a4);

string_not_contains_constraintt a5(
from_index,
// string_not contains_constraintt are formulas of the form:
// forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y]
string_not_contains_constraintt a4(
from_index,
offset,
contains,
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a4);

string_not_contains_constraintt a5(
from_index,
plus_exprt( // Add 1 for inclusive upper bound.
minus_exprt(haystack.length(), needle.length()),
not_exprt(contains),
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a5);
}
else
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think we could optimise for the constant case (by complete unfolding). Could we put a TODO there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After we talked IRL, we decided against a TODO.

{
// Unfold the existential quantifier as a disjunction in case of a constant
// a4 && a5 <=> a6:
// forall n:[from_index,|haystack|-|needle|].
// !contains || n < offset ==>
// haystack[n] != needle[0] || ... ||
// haystack[n+|needle|-1] != needle[|needle|-1]
symbol_exprt qvar2=fresh_univ_index("QA_index_of_string_2", index_type);
mp_integer sub_length;
INVARIANT(
!to_integer(needle.length(), sub_length),
string_refinement_invariantt("a constant string must have constant "
"length"));
exprt::operandst disjuncts;
for(mp_integer offset=0; offset<sub_length; ++offset)
{
exprt expr_offset=from_integer(offset, index_type);
plus_exprt shifted(expr_offset, qvar2);
disjuncts.push_back(
not_exprt(equal_exprt(haystack[shifted], needle[expr_offset])));
}

or_exprt premise(
not_exprt(contains), binary_relation_exprt(qvar2, ID_lt, offset));
minus_exprt length_diff(haystack.length(), needle.length());
string_constraintt a6(
qvar2,
from_index,
plus_exprt(from_integer(1, index_type), length_diff),
premise,
disjunction(disjuncts));
axioms.push_back(a6);
}
from_integer(1, index_type)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably this is to create an exclusive upper-bound? If so put a quick end-of-line comment saying so

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bounds are [lower, upper), so to create [lower, upper], I did [lower, upper+1). I'll add a comment.

not_exprt(contains),
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a5);

return offset;
}

/// Add axioms stating that the returned value is the index within haystack of
/// the last occurence of needle starting the search backward at from_index (ie
/// the last occurrence of needle starting the search backward at from_index (ie
/// the index is smaller or equal to from_index), or -1 if needle does not occur
/// before from_index.
/// \param haystack: a string expression
Expand Down Expand Up @@ -235,62 +200,25 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string(
from_index,
length_diff);

if(!is_constant_string(needle))
{
string_not_contains_constraintt a4(
plus_exprt(offset, from_integer(1, index_type)),
plus_exprt(end_index, from_integer(1, index_type)),
contains,
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a4);

string_not_contains_constraintt a5(
from_integer(0, index_type),
plus_exprt(end_index, from_integer(1, index_type)),
not_exprt(contains),
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a5);
}
else
{
// Unfold the existential quantifier as a disjunction in case of a constant
// a4 && a5 <=> a6:
// forall n:[0, min(from_index, |haystack| - |needle|)].
// !contains || (n > offset) ==>
// haystack[n] != needle[0] || ... ||
// haystack[n+|needle|-1] != needle[|needle|-1]
symbol_exprt qvar2=fresh_univ_index("QA_index_of_string_2", index_type);
mp_integer sub_length;
INVARIANT(
!to_integer(needle.length(), sub_length),
string_refinement_invariantt("a constant string must have constant "
"length"));
exprt::operandst disjuncts;
for(mp_integer offset=0; offset<sub_length; ++offset)
{
exprt expr_offset=from_integer(offset, index_type);
plus_exprt shifted(expr_offset, qvar2);
disjuncts.push_back(
not_exprt(equal_exprt(haystack[shifted], needle[expr_offset])));
}

or_exprt premise(
not_exprt(contains), binary_relation_exprt(qvar2, ID_gt, offset));

string_constraintt a6(
qvar2,
from_integer(0, index_type),
plus_exprt(from_integer(1, index_type), end_index),
premise,
disjunction(disjuncts));
axioms.push_back(a6);
}
string_not_contains_constraintt a4(
plus_exprt(offset, from_integer(1, index_type)),
plus_exprt(end_index, from_integer(1, index_type)),
contains,
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a4);

string_not_contains_constraintt a5(
from_integer(0, index_type),
plus_exprt(end_index, from_integer(1, index_type)),
not_exprt(contains),
from_integer(0, index_type),
needle.length(),
haystack,
needle);
axioms.push_back(a5);

return offset;
}
Expand Down Expand Up @@ -333,7 +261,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of(
}

/// Add axioms stating that the returned value is the index within str of the
/// last occurence of c starting the search backward at from_index, or -1 if no
/// last occurrence of c starting the search backward at from_index, or -1 if no
/// such character occurs at or before position from_index.
/// \param str: a string expression
/// \param c: an expression representing a character
Expand Down Expand Up @@ -373,7 +301,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of(
equal_exprt(str[index], c)));
axioms.push_back(a3);

symbol_exprt n=fresh_univ_index("QA_last_index_of", index_type);
symbol_exprt n=fresh_univ_index("QA_last_index_of1", index_type);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Double-check whether the fresh... logic will add a numerical suffix that will be confusing when paired with the numerical suffixes specified here, and if so rename

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The final suffix is a number which is incremented globally for any symbol. I think it makes sense to put our own number here to figure out which axiom variable is being used and will not be confusing as there is a # separator.

string_constraintt a4(
n,
plus_exprt(index, index1),
Expand All @@ -382,7 +310,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of(
not_exprt(equal_exprt(str[n], c)));
axioms.push_back(a4);

symbol_exprt m=fresh_univ_index("QA_last_index_of", index_type);
symbol_exprt m=fresh_univ_index("QA_last_index_of2", index_type);
string_constraintt a5(
m,
from_index_plus_one,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ string_exprt string_constraint_generatort::convert_java_string_to_string_exprt(
{
java_content=dereference_exprt(java_content, java_content.type());
}

refined_string_typet type(java_int_type(), java_char_type());

return string_exprt(length, java_content, type);
Expand Down
81 changes: 19 additions & 62 deletions src/solvers/refinement/string_constraint_generator_testing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ exprt string_constraint_generatort::add_axioms_for_contains(
PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
string_exprt s0=get_string_expr(args(f, 2)[0]);
string_exprt s1=get_string_expr(args(f, 2)[1]);
bool constant=is_constant_string(s1);

symbol_exprt contains=fresh_boolean("contains");
const refined_string_typet ref_type=to_refined_string_type(s0.type());
Expand Down Expand Up @@ -218,66 +217,24 @@ exprt string_constraint_generatort::add_axioms_for_contains(
equal_exprt(startpos, from_integer(-1, index_type)));
axioms.push_back(a3);

if(constant)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd leave a TODO here too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

{
// If the string is constant, we can use a more efficient axiom for a4:
// contains ==> AND_{i < |s1|} s1[i] = s0[startpos + i]
mp_integer s1_length;
INVARIANT(
!to_integer(s1.length(), s1_length),
string_refinement_invariantt("a constant string expression must have a "
"constant length"));
exprt::operandst conjuncts;
for(mp_integer i=0; i<s1_length; ++i)
{
exprt expr_i=from_integer(i, index_type);
plus_exprt shifted_i(expr_i, startpos);
conjuncts.push_back(equal_exprt(s1[expr_i], s0[shifted_i]));
}
implies_exprt a4(contains, conjunction(conjuncts));
axioms.push_back(a4);

// The a5 constraint for constant strings translates to:
// !contains ==> |s1| > |s0| ||
// (forall qvar <= |s0| - |s1|.
// !(AND_{i < |s1|} s1[i] == s0[i + qvar])
//
// which we implement as:
// forall qvar <= |s0| - |s1|. (!contains && |s0| >= |s1|)
// ==> !(AND_{i < |s1|} (s1[i] == s0[qvar+i]))
symbol_exprt qvar=fresh_univ_index("QA_contains_constant", index_type);
exprt::operandst conjuncts1;
for(mp_integer i=0; i<s1_length; ++i)
{
exprt expr_i=from_integer(i, index_type);
plus_exprt shifted_i(expr_i, qvar);
conjuncts1.push_back(equal_exprt(s1[expr_i], s0[shifted_i]));
}

string_constraintt a5(
qvar,
plus_exprt(from_integer(1, index_type), length_diff),
and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)),
not_exprt(conjunction(conjuncts1)));
axioms.push_back(a5);
}
else
{
symbol_exprt qvar=fresh_univ_index("QA_contains", index_type);
exprt qvar_shifted=plus_exprt(qvar, startpos);
string_constraintt a4(
qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted]));
axioms.push_back(a4);

// We rewrite axiom a4 as:
// forall startpos <= |s0|-|s1|. (!contains && |s0| >= |s1|)
// ==> exists witness < |s1|. s1[witness] != s0[startpos+witness]
string_not_contains_constraintt a5(
from_integer(0, index_type),
plus_exprt(from_integer(1, index_type), length_diff),
and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)),
from_integer(0, index_type), s1.length(), s0, s1);
axioms.push_back(a5);
}
symbol_exprt qvar=fresh_univ_index("QA_contains", index_type);
exprt qvar_shifted=plus_exprt(qvar, startpos);
string_constraintt a4(
qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted]));
axioms.push_back(a4);

// We rewrite axiom a4 as:
// forall startpos <= |s0|-|s1|. (!contains && |s0| >= |s1|)
// ==> exists witness < |s1|. s1[witness] != s0[startpos+witness]
string_not_contains_constraintt a5(
from_integer(0, index_type),
plus_exprt(from_integer(1, index_type), length_diff),
and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)),
from_integer(0, index_type),
s1.length(),
s0,
s1);
axioms.push_back(a5);

return typecast_exprt(contains, f.type());
}
Loading