Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

some additional performance improvements #39

Merged
merged 7 commits into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<groupId>software.amazon.event.ruler</groupId>
<artifactId>event-ruler</artifactId>
<name>Event Ruler</name>
<version>1.0.0</version>
<version>1.0.1</version>
<description>Event Ruler is a Java library that allows matching Rules to Events. An event is a list of fields,
which may be given as name/value pairs or as a JSON object. A rule associates event field names with lists of
possible values. There are two reasons to use Ruler: 1/ It's fast; the time it takes to match Events doesn't
Expand Down
11 changes: 3 additions & 8 deletions src/main/software/amazon/event/ruler/ACFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,14 @@ private static List<Object> find(final ACTask task) {
if (startState == null) {
return Collections.emptyList();
}
moveFrom(startState, 0, task, new ArrayMembership());

// each iteration removes a Step and adds zero or more new ones
while (task.stepsRemain()) {
tryStep(task);
}
moveFrom(startState, 0, task, new ArrayMembership());

return task.getMatchedRules();
}

// remove a step from the work queue and see if there's a transition
private static void tryStep(final ACTask task) {
final ACStep step = task.nextStep();
private static void tryStep(final ACTask task, final ACStep step) {
final Field field = task.event.fields.get(step.fieldIndex);

// if we can step from where we are to the new field without violating array consistency
Expand Down Expand Up @@ -99,7 +94,7 @@ private static void moveFrom(final NameState fromState, int fieldIndex, final AC
tryMustNotExistMatch(fromState, task, fieldIndex, arrayMembership);

while (fieldIndex < task.fieldCount) {
task.addStep(fieldIndex++, fromState, arrayMembership);
tryStep(task, new ACStep(fieldIndex++, fromState, arrayMembership));
}
}

Expand Down
20 changes: 0 additions & 20 deletions src/main/software/amazon/event/ruler/ACTask.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
package software.amazon.event.ruler;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Queue;

/**
* Represents the state of an Array-Consistent rule-finding project.
Expand All @@ -18,9 +16,6 @@ class ACTask {
// the rules, if we find any
private final HashSet<Object> rules = new HashSet<>();

// Steps queued up for processing
private final Queue<ACStep> stepQueue = new ArrayDeque<>();

// the state machine
private final GenericMachine<?> machine;

Expand All @@ -34,21 +29,6 @@ NameState startState() {
return machine.getStartState();
}

ACStep nextStep() {
return stepQueue.remove();
}

/*
* Add a step to the queue for later consideration
*/
void addStep(final int fieldIndex, final NameState nameState, final ArrayMembership membershipSoFar) {
stepQueue.add(new ACStep(fieldIndex, nameState, membershipSoFar));
}

boolean stepsRemain() {
return !stepQueue.isEmpty();
}

List<Object> getMatchedRules() {
return new ArrayList<>(rules);
}
Expand Down
11 changes: 7 additions & 4 deletions src/main/software/amazon/event/ruler/ByteMachine.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
Expand Down Expand Up @@ -810,9 +809,13 @@ NameState findPattern(final Patterns pattern) {

private NameState findAnythingButPattern(AnythingBut pattern) {

Set<NameState> nextNameStates = pattern.getValues().stream().
map(value -> findMatchPattern(getParser().parse(pattern.type(), value), pattern)).
filter(Objects::nonNull).collect(Collectors.toSet());
Set<NameState> nextNameStates = new HashSet<>(pattern.getValues().size());
for (String value : pattern.getValues()) {
NameState matchPattern = findMatchPattern(getParser().parse(pattern.type(), value), pattern);
if (matchPattern != null) {
nextNameStates.add(matchPattern);
}
}
Comment on lines +813 to +818
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super-duper nitpicking, but I think you can have a consistent style here and in ByteMap.getTransitions() if you for x.forEach( elem -> addWhenNotNull (...) ). I'm not sure on the performance implications yet but maybe slightly readable. Maybe...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did update this one to the new style that I believe is easier to understand.

if (!nextNameStates.isEmpty()) {
assert nextNameStates.size() == 1 : "nextNameStates.size() == 1";
return nextNameStates.iterator().next();
Expand Down
10 changes: 7 additions & 3 deletions src/main/software/amazon/event/ruler/ByteMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
import java.util.Iterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;

import static software.amazon.event.ruler.CompoundByteTransition.coalesce;

Expand Down Expand Up @@ -193,7 +191,13 @@ ByteTransition getTransitionForAllBytes() {
* @return All transitions contained in this map.
*/
Set<ByteTransition> getTransitions() {
return map.values().stream().filter(Objects::nonNull).collect(Collectors.toSet());
Set<ByteTransition> result = new HashSet<>(map.values().size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note that the Set may end up containing less elements than map.values().size(), because 1) nulls are removed, and 2) the values in the map are not unique. I suppose it's probably more efficient to oversize the Set initially though than to undersize it by going with the default constructor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes good observation, I was aware of this, I just took the size of the values as the upper bound of the set size, but perf wise its cheaper to allocate above, than allowing the set to grow

for (ByteTransition transition : map.values()) {
if (transition != null) {
result.add(transition);
}
}
return result;
}

/**
Expand Down
16 changes: 9 additions & 7 deletions src/main/software/amazon/event/ruler/Patterns.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package software.amazon.event.ruler;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* The Patterns deal pre-processing of rules for the eventual matching against events.
Expand Down Expand Up @@ -50,21 +50,23 @@ public static ValuePatterns suffixMatch(final String suffix) {
}

public static AnythingBut anythingButMatch(final String anythingBut) {
return new AnythingBut(Stream.of(anythingBut).collect(Collectors.toSet()), false);
return new AnythingBut(Collections.singleton(anythingBut), false);
}

public static AnythingBut anythingButMatch(final double anythingBut) {
return new AnythingBut(Stream.of(anythingBut).map(ComparableNumber::generate).collect(Collectors.toSet()),
true);
return new AnythingBut(Collections.singleton(ComparableNumber.generate(anythingBut)), true);
}

public static AnythingBut anythingButMatch(final Set<String> anythingButs) {
return new AnythingBut(anythingButs, false);
}

public static AnythingBut anythingButNumberMatch(final Set<Double> anythingButs) {
return new AnythingBut(anythingButs.stream().map(ComparableNumber::generate).collect(Collectors.toSet()),
true);
Set<String> normalizedNumbers = new HashSet<>(anythingButs.size());
for (Double d : anythingButs) {
normalizedNumbers.add(ComparableNumber.generate(d));
}
return new AnythingBut(normalizedNumbers, true);
}

public static ValuePatterns anythingButPrefix(final String prefix) {
Expand Down
44 changes: 28 additions & 16 deletions src/main/software/amazon/event/ruler/Range.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
package software.amazon.event.ruler;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
* Represents a range of numeric values to match against.
Expand Down Expand Up @@ -56,53 +54,67 @@ private Range(Range range) {
public static Range lessThan(final double val) {
return new Range(-Constants.FIVE_BILLION, false, val, true);
}

public static Range lessThanOrEqualTo(final double val) {
return new Range(-Constants.FIVE_BILLION, false, val, false);
}

public static Range greaterThan(final double val) {
return new Range(val, true, Constants.FIVE_BILLION, false);
}

public static Range greaterThanOrEqualTo(final double val) {
return new Range(val, false, Constants.FIVE_BILLION, false);
}

public static Range between(final double bottom, final boolean openBottom, final double top, final boolean openTop) {
return new Range(bottom, openBottom, top, openTop);
}

private static Range deepCopy(final Range range) { return new Range(range); }
private static Range deepCopy(final Range range) {
return new Range(range);
}

/**
* This is necessitated by the fact that we do range comparisons of numbers, fixed-length strings of digits, and
* in the case where the numbers represent IP addresses, they are hex digits. So we need to be able to say
* "for all digits between '3' and 'C'". This is for that.
*
* @param first Start one digit higher than this, for example '4'
* @param last Stop one digit lower than this 'B'
* @return The digit list, for example [ '4, '5', '6', '7', '8', '9', '9', 'A' ] (with 'B' for longDigitSequence)
* @param last Stop one digit lower than this, for example 'B'
* @return The digit list, for example [ '4', '5', '6', '7', '8', '9', '9', 'A' ] (with 'B' for longDigitSequence)
*/
static List<Byte> digitSequence(byte first, byte last, boolean includeFirst, boolean includeLast) {
assert first <= last && first <= 'F'&& first >= '0'&& last <= 'F' && last >= '0';
static byte[] digitSequence(byte first, byte last, boolean includeFirst, boolean includeLast) {
assert first <= last && first <= 'F' && first >= '0' && last <= 'F';
assert !((first == last) && !includeFirst && !includeLast);

final List<Byte> bytes = new ArrayList<>();
int i = 0;
while (Constants.HEX_DIGITS[i] < first) {
i++;
}
int i = getHexByteIndex(first);
int j = getHexByteIndex(last);

if ((!includeFirst) && (i < (Constants.HEX_DIGITS.length - 1))) {
i++;
}
while (Constants.HEX_DIGITS[i] < last) {
bytes.add(Constants.HEX_DIGITS[i++]);
}

if (includeLast) {
bytes.add(Constants.HEX_DIGITS[i]);
j++;
}

byte[] bytes = new byte[j - i];

System.arraycopy(Constants.HEX_DIGITS, i, bytes, 0, j - i);

return bytes;
}

private static int getHexByteIndex(byte value) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonessha what do you think? I feel like using the chars helps with the clarity , and then only one obscure constant is needed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good! Thank you!

// ['0'-'9'] maps to [0-9] indexes
if (value >= 48 && value <= 57) {
return value - 48;
}
// ['A'-'F'] maps to [10-15] indexes
return (value - 65) + 10;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason not to just go with -55 here? If you're trying to lay out both steps here for understandability/readability, perhaps it would be better to go further and turn these numbers into private static final class variables?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mostly to try to keep it easy to understand, if we trust the compiler this is being optimized away.

I think the variable may be the better option then

}

@Override
public Object clone() {
super.clone();
Expand Down
4 changes: 1 addition & 3 deletions src/main/software/amazon/event/ruler/ShortcutTransition.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

// Shortcut transition is designed mainly for exactly match by its memory consuming because the exactly match is always
// in the last byte of value, while it will take a lots of memory if we build a traverse path byte by byte.
Expand Down Expand Up @@ -81,7 +79,7 @@ SingleByteTransition setMatch(ByteMatch match) {

@Override
public Set<ShortcutTransition> getShortcuts() {
return Stream.of(this).collect(Collectors.toSet());
return Collections.singleton(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import java.util.Collections;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* This class represents a singular ByteTransition. This is in contrast to a compound ByteTransition that represents
Expand Down Expand Up @@ -43,7 +41,7 @@ Set<ByteMatch> getMatches() {
if (match == null) {
return Collections.emptySet();
}
return Stream.of(match).collect(Collectors.toSet());
return Collections.singleton(match);
}

/**
Expand All @@ -53,7 +51,7 @@ Set<ByteMatch> getMatches() {
*/
@Override
Set<SingleByteTransition> expand() {
return Stream.of(this).collect(Collectors.toSet());
return Collections.singleton(this);
}

@Override
Expand Down
62 changes: 62 additions & 0 deletions src/test/software/amazon/event/ruler/Benchmarks.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package software.amazon.event.ruler;

import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.Test;

import java.io.BufferedReader;
Expand All @@ -9,6 +10,7 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -484,6 +486,66 @@ public void CL2Benchmark() throws Exception {
System.out.println("COMBO events/sec: " + String.format("%.1f", bm.getEPS()));
}

// make sure we can handle nasty deep events
@Test
public void DeepEventBenchmark() throws Exception {

// how many levels deep we want to go
int maxLevel = 100;
// we create a rule every time the number of events is a multiple of this number
int ruleEveryNEvents = 10;

ObjectMapper m = new ObjectMapper();

Map<String, Object> root = new HashMap<>();

Map<String, Object> ruleRoot = new HashMap<>();

List<String> deepEvents = new ArrayList<>();
List<String> deepRules = new ArrayList<>();
List<Integer> deepExpected = new ArrayList<>();

Map<String, Object> currentLevel = root;
Map<String, Object> currentRule = ruleRoot;

for (int i = 0; i < maxLevel; i++) {
currentLevel.put("numeric" + i, i * i);
currentLevel.put("string" + i, "value" + i);


if (i % ruleEveryNEvents == 0) {
currentRule.put("string" + i, Collections.singletonList("value" + i));
deepRules.add(m.writeValueAsString(ruleRoot));
currentRule.remove("string" + i);
// all the events generated below this point will match this rule.
deepExpected.add(maxLevel - i);
}

deepEvents.add(m.writeValueAsString(root));

HashMap<String, Object> newLevel = new HashMap<>();
currentLevel.put("level" + i, newLevel);
currentLevel = newLevel;

HashMap<String, Object> newRuleLevel = new HashMap<>();
currentRule.put("level" + i, newRuleLevel);
currentRule = newRuleLevel;
}

// warm up
Benchmarker bm = new Benchmarker();
bm.addRules(deepRules.toArray(new String[0]), deepExpected.stream().mapToInt(Integer::intValue).toArray());
bm.run(deepEvents);

// exercise
bm = new Benchmarker();
bm.addRules(deepRules.toArray(new String[0]), deepExpected.stream().mapToInt(Integer::intValue).toArray());
bm.run(deepEvents);

System.out.println("DEEP EXACT events/sec: " + String.format("%.1f", bm.getEPS()));

}

private final List<String> citylots2 = new ArrayList<>();

private static class Benchmarker {
Expand Down
Loading