Skip to content

Commit

Permalink
#2946. Addition of regex_matches/match_regex
Browse files Browse the repository at this point in the history
- 'regex_matches': an operator that returns the list of subsequences of
a string that match a given pattern.
- 'match_regex': a new branch in the switch statement that verifies if
portions of a string match a regular expression.
  • Loading branch information
AlexisDrogoul committed Aug 17, 2021
1 parent 5f1dd5f commit a31ae4d
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 68 deletions.
116 changes: 71 additions & 45 deletions msi.gama.core/src/msi/gaml/operators/Strings.java
Expand Up @@ -11,6 +11,10 @@
package msi.gaml.operators;

import java.util.StringTokenizer;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;

import msi.gama.common.interfaces.IKeyword;
import msi.gama.precompiler.GamlAnnotations.doc;
Expand All @@ -26,7 +30,6 @@
import msi.gama.util.IList;
import msi.gaml.types.IType;
import msi.gaml.types.Types;
import ummisco.gama.dev.utils.DEBUG;

/**
* Written by drogoul Modified on 10 d�c. 2010
Expand All @@ -37,11 +40,11 @@
@SuppressWarnings ({ "rawtypes" })
public class Strings {

// static {
// DEBUG.ON();
// }
// static {
// DEBUG.ON();
// }

public static final String LN = java.lang.System.getProperty("line.separator");
public static final String LN = java.lang.System.lineSeparator();
public static final String TAB = "\t";

@operator (
Expand Down Expand Up @@ -138,7 +141,7 @@ public static Boolean opContainsAny(final String target, final IList l) {
equals = "false")))
public static Boolean opContainsAll(final String target, final IList l) {
for (final Object o : l) {
if (!(o instanceof String && opContains(target, (String) o))) return false;
if (!(o instanceof String) || !opContains(target, (String) o)) return false;
}
return true;
}
Expand Down Expand Up @@ -235,7 +238,7 @@ public static IList opTokenize(final IScope scope, final String target, final St
category = { IOperatorCategory.STRING },
concept = { IConcept.STRING })
@doc (
value = "Returns the String resulting by replacing for the first operand all the sub-strings corresponding the second operand by the third operand",
value = "Returns the string obtained by replacing by the third operand, in the first operand, all the sub-strings equal to the second operand",
examples = @example (
value = "replace('to be or not to be,that is the question','to', 'do')",
equals = "'do be or not do be,that is the question'"),
Expand All @@ -250,16 +253,40 @@ public static String opReplace(final String target, final String pattern, final
category = { IOperatorCategory.STRING },
concept = { IConcept.STRING })
@doc (
value = "Returns the String resulting by replacing for the first operand all the sub-strings corresponding to the regular expression given in the second operand by the third operand",
value = "Returns the string obtained by replacing by the third operand, in the first operand, all the sub-strings that match the regular expression of the second operand",
examples = @example (
value = "replace_regex(\"colour, color\", \"colou?r\", \"col\")",
equals = "'col, col'"),
see = { "replace" })
public static String opReplaceRegex(final String target, final String pattern, final String replacement) {
// DEBUG.OUT("String pattern = " + pattern);
// DEBUG.OUT("String pattern = " + pattern);
return target.replaceAll(pattern, replacement);
}

@operator (
value = { "regex_matches" },
can_be_const = true,
category = { IOperatorCategory.STRING },
concept = { IConcept.STRING })
@doc (
value = "Returns the list of sub-strings of the first operand that match the regular expression provided in the second operand",
examples = @example (
value = "regex_matches(\"colour, color\", \"colou?r\")",
equals = "['colour','color']"),
see = { "replace_regex" })
public static IList<String> opRegexMatches(final String target, final String pattern) {
if (pattern == null || pattern.isEmpty()) return GamaListFactory.create();
Pattern p;
try {
p = Pattern.compile(pattern);
} catch (PatternSyntaxException e) {
return target.contains(pattern) ? GamaListFactory.createWithoutCasting(Types.STRING, pattern)
: GamaListFactory.create();
}
return GamaListFactory.wrap(Types.STRING,
p.matcher(target).results().map(MatchResult::group).collect(Collectors.toList()));
}

@operator (
value = "is_number",
can_be_const = true,
Expand Down Expand Up @@ -300,18 +327,16 @@ public static Boolean isGamaNumber(final String s) {

// deal with any possible sign up front
final int start = s.charAt(0) == '-' ? 1 : 0;
if (sz > start + 1) {
if (s.charAt(start) == '#') {
int i = start + 1;
if (i == sz) return false; // str == "#"
// Checking hex (it can't be anything else)
for (; i < length; i++) {
final char c = s.charAt(i);
if ((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) return false;
}

return true;
if (sz > start + 1 && s.charAt(start) == '#') {
int i = start + 1;
if (i == sz) return false; // str == "#"
// Checking hex (it can't be anything else)
for (; i < length; i++) {
final char c = s.charAt(i);
if ((c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F')) return false;
}

return true;
}

sz--; // Don't want to loop to the last char, check it afterwords for
Expand All @@ -327,36 +352,37 @@ public static Boolean isGamaNumber(final String s) {
if (c >= '0' && c <= '9') {
foundDigit = true;
allowSigns = false;
} else if (c == '.') {
if (hasDecPoint || hasExp) // Two decimal points or dec in exponent
return false;
hasDecPoint = true;
} else if (c == 'e' || c == 'E') {
// We've already taken care of hex.
if (hasExp) // Two E's
return false;
if (foundDigit) {
hasExp = true;
allowSigns = true;
} else
return false;
} else if (c == '-') {
if (allowSigns) {
allowSigns = false;
foundDigit = false; // We need a digit after the E
} else
return false;
} else
return false;
} else {
switch (c) {
case '.':
if (hasDecPoint || hasExp) // Two decimal points or dec in exponent
return false;
hasDecPoint = true;
break;
case 'e':
case 'E':
// We've already taken care of hex.
if (hasExp || !foundDigit) return false;
hasExp = true;
allowSigns = true;
break;
case '-':
if (!allowSigns) return false;
allowSigns = false;
foundDigit = false; // We need a digit after the E
break;
default:
return false;
}
}

i++;
}

if (i < length) {
final char c = s.charAt(i);
if (c >= '0' && c <= '9')
return true; // No type qualifier, OK
else if (c == 'e' || c == 'E') return false; // can't have an E at the last byte
if (c >= '0' && c <= '9') return true; // No type qualifier, OK
if (c == 'e' || c == 'E') return false; // can't have an E at the last byte
}

// allowSigns is true iff the val ends in 'E'
Expand Down Expand Up @@ -471,7 +497,7 @@ public static String get(final String lv, final int rv) {
equals = "'\"'")))
static public String asChar(final Integer s) {
if (s == null) return "";
return Character.toString((char) s.intValue());
return Character.toString((char) s.byteValue());
}

@operator (
Expand Down
52 changes: 43 additions & 9 deletions msi.gama.core/src/msi/gaml/statements/MatchStatement.java
Expand Up @@ -10,6 +10,9 @@
********************************************************************************************************/
package msi.gaml.statements;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import msi.gama.common.interfaces.IKeyword;
import msi.gama.metamodel.shape.GamaPoint;
import msi.gama.precompiler.GamlAnnotations.doc;
Expand All @@ -36,7 +39,7 @@
* @author drogoul 14 nov. 07
*/
@symbol (
name = { IKeyword.MATCH, IKeyword.MATCH_BETWEEN, IKeyword.MATCH_ONE },
name = { IKeyword.MATCH, IKeyword.MATCH_BETWEEN, IKeyword.MATCH_ONE, IKeyword.MATCH_REGEX },
kind = ISymbolKind.SEQUENCE_STATEMENT,
concept = { IConcept.CONDITION },
with_sequence = true)
Expand All @@ -50,7 +53,7 @@
doc = @doc ("The value or values this statement tries to match")) },
omissible = IKeyword.VALUE)
@doc (
value = "In a switch...match structure, the value of each match block is compared to the value in the switch. If they match, the embedded statement set is executed. Three kinds of match can be used",
value = "In a switch...match structure, the value of each match block is compared to the value in the switch. If they match, the embedded statement set is executed. Four kinds of match can be used, equality, containment, betweenness and regex matching",
usages = { @usage (
value = IKeyword.MATCH + " block is executed if the switch value is equals to the value of the match:",
examples = { @example (
Expand Down Expand Up @@ -110,8 +113,9 @@ public MatchStatement(final IDescription desc) {
value = getFacet(IKeyword.VALUE);
final String keyword = desc.getKeyword();
setName(keyword + " " + (value == null ? "" : value.serialize(false)));
executer = IKeyword.MATCH.equals(keyword) ? new SimpleMatch() : IKeyword.MATCH_ONE.equals(keyword)
? new MatchOne() : IKeyword.MATCH_BETWEEN.equals(keyword) ? new MatchBetween() : null;
executer = IKeyword.MATCH.equals(keyword) ? new SimpleMatch()
: IKeyword.MATCH_ONE.equals(keyword) ? new MatchOne() : IKeyword.MATCH_BETWEEN.equals(keyword)
? new MatchBetween() : IKeyword.MATCH_REGEX.equals(keyword) ? new MatchRegex() : null;
if (executer != null) { executer.acceptValue(); }
}

Expand Down Expand Up @@ -155,10 +159,40 @@ public boolean matches(final IScope scope, final Object switchValue) throws Gama
@Override
public void acceptValue() {
super.acceptValue();
if ((constantValue != null) && !(constantValue instanceof IContainer)) {
if (!(constantValue instanceof GamaPoint)) {
constantValue = Types.LIST.cast(null, constantValue, null, false);
}
if (constantValue != null && !(constantValue instanceof IContainer)
&& !(constantValue instanceof GamaPoint)) {
constantValue = Types.LIST.cast(null, constantValue, null, false);
}
}
}

class MatchRegex extends MatchExecuter {

@Override
public boolean matches(final IScope scope, final Object switchValue) throws GamaRuntimeException {
final Object val = getValue(scope);
if (!(switchValue instanceof String)) throw GamaRuntimeException.error(
"Can only match strings against a regular expression. " + switchValue + " is not a string", scope);
if (!(val instanceof String)) throw GamaRuntimeException
.error("Can only match strings against a regular expression. " + val + " is not a string", scope);
String target = (String) switchValue;
String pattern = (String) val;
if (pattern.isEmpty()) return false;
try {
Pattern p = Pattern.compile(pattern);
return p.matcher(target).find();
} catch (PatternSyntaxException e) {
// throw GamaRuntimeException.error(
// "The syntax of " + pattern + " is not a correct regular expression", scope);
return target.contains(pattern);
}
}

@Override
public void acceptValue() {
super.acceptValue();
if (constantValue != null && !(constantValue instanceof String)) {
constantValue = Types.STRING.cast(null, constantValue, null, false);
}
}
}
Expand All @@ -183,7 +217,7 @@ public boolean matches(final IScope scope, final Object switchValue) throws Gama
@Override
public void acceptValue() {
super.acceptValue();
if ((constantValue != null) && !(constantValue instanceof GamaPoint)) {
if (constantValue != null && !(constantValue instanceof GamaPoint)) {
constantValue = Types.POINT.cast(null, constantValue, null, false);
}

Expand Down
26 changes: 12 additions & 14 deletions msi.gama.core/src/msi/gaml/statements/SwitchStatement.java
Expand Up @@ -26,8 +26,8 @@
import msi.gama.precompiler.GamlAnnotations.usage;
import msi.gama.precompiler.IConcept;
import msi.gama.precompiler.ISymbolKind;
import msi.gama.runtime.IScope;
import msi.gama.runtime.ExecutionResult;
import msi.gama.runtime.IScope;
import msi.gama.runtime.exceptions.GamaRuntimeException;
import msi.gaml.compilation.IDescriptionValidator;
import msi.gaml.compilation.ISymbol;
Expand Down Expand Up @@ -59,7 +59,9 @@
doc = @doc ("an expression")) },
omissible = IKeyword.VALUE)
@doc (
value = "The \"switch... match\" statement is a powerful replacement for imbricated \"if ... else ...\" constructs. All the blocks that match are executed in the order they are defined. The block prefixed by default is executed only if none have matched (otherwise it is not).",
value = "The \"switch... match\" statement is a powerful replacement for imbricated \"if ... else ...\" constructs. "
+ "All the blocks that match are executed in the order they are defined, unless one invokes 'break', in which case "
+ "the switch statement is exited. The block prefixed by default is executed only if none have matched (otherwise it is not).",
usages = { @usage (
value = "The prototypical syntax is as follows:",
examples = { @example (
Expand Down Expand Up @@ -233,14 +235,12 @@ public void validate(final IDescription desc) {
// match_one or match_between)
final Iterable<IDescription> matches = desc.getChildrenWithKeyword(MATCH);
final IExpression switchValue = desc.getFacetExpr(VALUE);
if (switchValue == null) { return; }
if (switchValue == null) return;
final IType switchType = switchValue.getGamlType();
if (switchType.equals(Types.NO_TYPE)) { return; }
if (switchType.equals(Types.NO_TYPE)) return;
for (final IDescription match : matches) {
final IExpression value = match.getFacetExpr(VALUE);
if (value == null) {
continue;
}
if (value == null) { continue; }
final IType<?> matchType = value.getGamlType();
// AD : special case introduced for ints and floats (a warning
// is emitted)
Expand All @@ -252,9 +252,7 @@ public void validate(final IDescription desc) {
continue;
}

if (matchType.isTranslatableInto(switchType)) {
continue;
}
if (matchType.isTranslatableInto(switchType)) { continue; }
match.warning(
"The value " + value.serialize(false) + " of type " + matchType
+ " is compared to a value of type " + switchType + ", which will never match ",
Expand Down Expand Up @@ -287,7 +285,7 @@ public void setChildren(final Iterable<? extends ISymbol> commands) {
final List<MatchStatement> cases = new ArrayList<>();
for (final ISymbol c : commands) {
if (c instanceof MatchStatement) {
if (((MatchStatement) c).getKeyword().equals(IKeyword.DEFAULT)) {
if (IKeyword.DEFAULT.equals(((MatchStatement) c).getKeyword())) {
defaultMatch = (MatchStatement) c;
} else {
cases.add((MatchStatement) c);
Expand All @@ -304,15 +302,15 @@ public Object privateExecuteIn(final IScope scope) throws GamaRuntimeException {
final Object switchValue = value.value(scope);
Object lastResult = null;
for (final MatchStatement matche : matches) {
if (scope.interrupted()) { return lastResult; }
if (scope.interrupted()) return lastResult;
if (matche.matches(scope, switchValue)) {
final ExecutionResult er = scope.execute(matche);
if (!er.passed()) { return lastResult; }
if (!er.passed()) return lastResult;
lastResult = er.getValue();
hasMatched = true;
}
}
if (!hasMatched && defaultMatch != null) { return scope.execute(defaultMatch).getValue(); }
if (!hasMatched && defaultMatch != null) return scope.execute(defaultMatch).getValue();
return lastResult;
}

Expand Down
Expand Up @@ -261,6 +261,7 @@ public interface IKeyword {
String MATCH = "match";
String MATCH_BETWEEN = "match_between";
String MATCH_ONE = "match_one";
String MATCH_REGEX = "match_regex";
String MASK = "mask";
String MAX = "max";
String MAXIMIZE = "maximize";
Expand Down

0 comments on commit a31ae4d

Please sign in to comment.