Skip to content

Commit

Permalink
Exponential memory improvement by re-using NameState across multiple …
Browse files Browse the repository at this point in the history
…patterns (#88)

* Exponential memory improvement by re-using NameState across multiple patterns

* Fixing errant change to ByteMatch

* Adding some not-null checks

* Fixing several bugs to make this change work

* Fixing build after pulling in latest changes

* Adding missing Test annotation

* Fixing infinite loop bug in ACFinder

* Fixing package name

* Fixing anythingButs to use pattern as well as namestate

* Optimizing NameState further by only storing IDs for non-terminal sub-rules

* Keeping duplicate rules out of NameState

* Version bump

* Additional NameState re-use for case when sequence of keys has already been encountered

* Fixing bug in determining newness of rules. Addressing some minor feedback from Long.

* Revert "Additional NameState re-use for case when sequence of keys has already been encountered"

This reverts commit d881815.

* Performance improvements to rulesFor(JSON)Event

* Fixing bug in tracking NameStates when adding steps
  • Loading branch information
jonessha committed May 23, 2023
1 parent 90dcc86 commit 1928acc
Show file tree
Hide file tree
Showing 24 changed files with 2,371 additions and 261 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<groupId>software.amazon.event.ruler</groupId>
<artifactId>event-ruler</artifactId>
<name>Event Ruler</name>
<version>1.2.2</version>
<version>1.3.0</version>
<description>Event Ruler is a Java library that allows matching Rules to Events. An event is a list of fields,
which may be given as name/value pairs or as a JSON object. A rule associates event field names with lists of
possible values. There are two reasons to use Ruler: 1/ It's fast; the time it takes to match Events doesn't
Expand Down
94 changes: 80 additions & 14 deletions src/main/software/amazon/event/ruler/ACFinder.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
package software.amazon.event.ruler;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static software.amazon.event.ruler.SetOperations.intersection;

/**
* Matches rules to events as does Finder, but in an array-consistent fashion, thus the AC prefix on the class name.
*/
class ACFinder {

private static final Patterns ABSENCE_PATTERN = Patterns.absencePatterns();

private ACFinder() { }

/**
Expand All @@ -29,14 +35,19 @@ private static List<Object> find(final ACTask task) {
if (startState == null) {
return Collections.emptyList();
}
moveFrom(null, startState, 0, task, new ArrayMembership());

moveFrom(startState, 0, task, new ArrayMembership());
// each iteration removes a Step and adds zero or more new ones
while (task.stepsRemain()) {
tryStep(task);
}

return task.getMatchedRules();
}

// remove a step from the work queue and see if there's a transition
private static void tryStep(final ACTask task, final ACStep step) {
private static void tryStep(final ACTask task) {
final ACStep step = task.nextStep();
final Field field = task.event.fields.get(step.fieldIndex);

// if we can step from where we are to the new field without violating array consistency
Expand All @@ -49,33 +60,37 @@ private static void tryStep(final ACTask task, final ACStep step) {

// loop through the value pattern matches, if any
final int nextFieldIndex = step.fieldIndex + 1;
for (NameState nextNameState : valueMatcher.transitionOn(field.val)) {
for (NameStateWithPattern nextNameStateWithPattern : valueMatcher.transitionOn(field.val)) {

// we have moved to a new NameState
// this NameState might imply a rule match
task.collectRules(nextNameState);
task.collectRules(step.candidateSubRuleIds, nextNameStateWithPattern.getNameState(),
nextNameStateWithPattern.getPattern());

// set up for attempting to move on from the new state
moveFrom(nextNameState, nextFieldIndex, task, newMembership);
moveFromWithPriorCandidates(step.candidateSubRuleIds, nextNameStateWithPattern.getNameState(),
nextNameStateWithPattern.getPattern(), nextFieldIndex, task, newMembership);
}
}
}
}

private static void tryMustNotExistMatch(final NameState nameState, final ACTask task, int nextKeyIndex, final ArrayMembership arrayMembership) {
private static void tryMustNotExistMatch(final Set<Double> candidateSubRuleIds, final NameState nameState,
final ACTask task, int nextKeyIndex, final ArrayMembership arrayMembership) {
if (!nameState.hasKeyTransitions()) {
return;
}

for (NameState nextNameState : nameState.getNameTransitions(task.event, arrayMembership)) {
if (nextNameState != null) {
addNameState(nextNameState, task, nextKeyIndex, arrayMembership);
addNameState(candidateSubRuleIds, nextNameState, ABSENCE_PATTERN, task, nextKeyIndex, arrayMembership);
}
}
}

// Move from a state. Give all the remaining event fields a chance to transition from it
private static void moveFrom(final NameState fromState, int fieldIndex, final ACTask task, final ArrayMembership arrayMembership) {
// Move from a state. Give all the remaining event fields a chance to transition from it.
private static void moveFrom(final Set<Double> candidateSubRuleIdsForNextStep, final NameState nameState,
int fieldIndex, final ACTask task, final ArrayMembership arrayMembership) {
/*
* The Name Matchers look for an [ { exists: false } ] match. They
* will match if a particular key is not present
Expand All @@ -91,17 +106,68 @@ private static void moveFrom(final NameState fromState, int fieldIndex, final AC
* the final state can still be evaluated to true if the particular event
* does not have the key configured for [ { exists: false } ].
*/
tryMustNotExistMatch(fromState, task, fieldIndex, arrayMembership);
tryMustNotExistMatch(candidateSubRuleIdsForNextStep, nameState, task, fieldIndex, arrayMembership);

while (fieldIndex < task.fieldCount) {
tryStep(task, new ACStep(fieldIndex++, fromState, arrayMembership));
task.addStep(fieldIndex++, nameState, candidateSubRuleIdsForNextStep, arrayMembership);
}
}

private static void addNameState(NameState nameState, ACTask task, int nextKeyIndex, final ArrayMembership arrayMembership) {
private static void moveFromWithPriorCandidates(final Set<Double> candidateSubRuleIds,
final NameState fromState, final Patterns fromPattern,
final int fieldIndex, final ACTask task,
final ArrayMembership arrayMembership) {
Set<Double> candidateSubRuleIdsForNextStep = calculateCandidateSubRuleIdsForNextStep(candidateSubRuleIds,
fromState, fromPattern);

// If there are no more candidate sub-rules, there is no need to proceed further.
if (candidateSubRuleIdsForNextStep != null && !candidateSubRuleIdsForNextStep.isEmpty()) {
moveFrom(candidateSubRuleIdsForNextStep, fromState, fieldIndex, task, arrayMembership);
}
}

/**
* Calculate the candidate sub-rule IDs for the next step.
*
* @param currentCandidateSubRuleIds The candidate sub-rule IDs for the current step. Use null to indicate that we
* are on first step and so there are not yet any candidate sub-rules.
* @param fromState The NameState we are transitioning from.
* @param fromPattern The pattern we used to transition from fromState.
* @return The set of candidate sub-rule IDs for the next step. Null means there are no candidates and thus, there
* is no point to evaluating subsequent steps.
*/
private static Set<Double> calculateCandidateSubRuleIdsForNextStep(final Set<Double> currentCandidateSubRuleIds,
final NameState fromState,
final Patterns fromPattern) {
// These are all the sub-rules that use the matched pattern to transition to the next NameState. Note that they
// are not all candidates as they may have required different values for previously evaluated fields.
Set<Double> subRuleIds = fromState.getNonTerminalSubRuleIdsForPattern(fromPattern);

// If no sub-rules used the matched pattern to transition to the next NameState, then there are no matches to be
// found by going further.
if (subRuleIds == null) {
return null;
}

// If there are no candidate sub-rules, this means we are on the first NameState and must initialize the
// candidate sub-rules to those that used the matched pattern to transition to the next NameState.
if (currentCandidateSubRuleIds == null || currentCandidateSubRuleIds.isEmpty()) {
return subRuleIds;
}

// There are candidate sub-rules, so retain only those that used the matched pattern to transition to the next
// NameState.
Set<Double> candidateSubRuleIdsForNextStep = new HashSet<>();
intersection(subRuleIds, currentCandidateSubRuleIds, candidateSubRuleIdsForNextStep);
return candidateSubRuleIdsForNextStep;
}

private static void addNameState(Set<Double> candidateSubRuleIds, NameState nameState, Patterns pattern,
ACTask task, int nextKeyIndex, final ArrayMembership arrayMembership) {
// one of the matches might imply a rule match
task.collectRules(nameState);
task.collectRules(candidateSubRuleIds, nameState, pattern);

moveFrom(nameState, nextKeyIndex, task, arrayMembership);
moveFromWithPriorCandidates(candidateSubRuleIds, nameState, pattern, nextKeyIndex, task, arrayMembership);
}
}

7 changes: 6 additions & 1 deletion src/main/software/amazon/event/ruler/ACStep.java
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
package software.amazon.event.ruler;

import java.util.Set;

/**
* Represents a suggestion of a state/token combo from which there might be a transition, in an array-consistent fashion.
*/
class ACStep {
final int fieldIndex;
final NameState nameState;
final Set<Double> candidateSubRuleIds;
final ArrayMembership membershipSoFar;

ACStep(final int fieldIndex, final NameState nameState, final ArrayMembership arrayMembership) {
ACStep(final int fieldIndex, final NameState nameState, final Set<Double> candidateSubRuleIds,
final ArrayMembership arrayMembership) {
this.fieldIndex = fieldIndex;
this.nameState = nameState;
this.candidateSubRuleIds = candidateSubRuleIds;
this.membershipSoFar = arrayMembership;
}
}
46 changes: 41 additions & 5 deletions src/main/software/amazon/event/ruler/ACTask.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package software.amazon.event.ruler;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import static software.amazon.event.ruler.SetOperations.intersection;

/**
* Represents the state of an Array-Consistent rule-finding project.
Expand All @@ -13,8 +18,11 @@ class ACTask {
public final Event event;
final int fieldCount;

// the rules, if we find any
private final HashSet<Object> rules = new HashSet<>();
// the rules that matched the event, if we find any
private final Set<Object> matchingRules = new HashSet<>();

// Steps queued up for processing
private final Queue<ACStep> stepQueue = new ArrayDeque<>();

// the state machine
private final GenericMachine<?> machine;
Expand All @@ -29,11 +37,39 @@ NameState startState() {
return machine.getStartState();
}

ACStep nextStep() {
return stepQueue.remove();
}

/*
* Add a step to the queue for later consideration
*/
void addStep(final int fieldIndex, final NameState nameState, final Set<Double> candidateSubRuleIds,
final ArrayMembership membershipSoFar) {
stepQueue.add(new ACStep(fieldIndex, nameState, candidateSubRuleIds, membershipSoFar));
}

boolean stepsRemain() {
return !stepQueue.isEmpty();
}

List<Object> getMatchedRules() {
return new ArrayList<>(rules);
return new ArrayList<>(matchingRules);
}

void collectRules(final NameState nameState) {
rules.addAll(nameState.getRules());
void collectRules(final Set<Double> candidateSubRuleIds, final NameState nameState, final Patterns pattern) {
Set<Double> terminalSubRuleIds = nameState.getTerminalSubRuleIdsForPattern(pattern);
if (terminalSubRuleIds == null) {
return;
}

// If no candidates, that means we're on the first step, so all sub-rules are candidates.
if (candidateSubRuleIds == null || candidateSubRuleIds.isEmpty()) {
for (Double terminalSubRuleId : terminalSubRuleIds) {
matchingRules.add(nameState.getRule(terminalSubRuleId));
}
} else {
intersection(candidateSubRuleIds, terminalSubRuleIds, matchingRules, id -> nameState.getRule(id));
}
}
}
Loading

0 comments on commit 1928acc

Please sign in to comment.