New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
reduce node lookups in event processing #173
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,10 +31,8 @@ | |
import java.util.Set; | ||
|
||
import javax.annotation.PostConstruct; | ||
import javax.annotation.PreDestroy; | ||
import javax.inject.Inject; | ||
import javax.jcr.Item; | ||
import javax.jcr.Node; | ||
import javax.jcr.PathNotFoundException; | ||
import javax.jcr.RepositoryException; | ||
import javax.jcr.Session; | ||
import javax.jcr.observation.Event; | ||
|
@@ -79,6 +77,7 @@ public class SimpleObserver implements EventListener { | |
@Inject | ||
private EventFilter eventFilter; | ||
|
||
// THIS SESSION SHOULD NOT BE USED TO LOOK UP NODES | ||
private Session session; | ||
|
||
/** | ||
|
@@ -93,6 +92,16 @@ public void buildListener() throws RepositoryException { | |
session.save(); | ||
} | ||
|
||
/** | ||
* logout of the session | ||
* @throws RepositoryException | ||
*/ | ||
@PreDestroy | ||
public void stopListening() throws RepositoryException { | ||
session.getWorkspace().getObservationManager().removeEventListener(this); | ||
session.logout(); | ||
} | ||
|
||
/** | ||
* Filter JCR events and transform them into our own FedoraEvents. | ||
* | ||
|
@@ -101,30 +110,42 @@ public void buildListener() throws RepositoryException { | |
@Override | ||
public void onEvent(final javax.jcr.observation.EventIterator events) { | ||
// keep track of nodes that trigger events to prevent duplicates | ||
final Set<Node> posted = new HashSet<Node>(); | ||
// size to minimize resizing. | ||
final Set<String> posted = new HashSet<String>((int)events.getSize() * 2 / 3); | ||
|
||
// post non-duplicate events approved by the filter | ||
for (final Event e : filter(new EventIterator(events), eventFilter)) { | ||
try { | ||
final Item item = session.getItem(e.getPath()); | ||
Node n = null; | ||
if ( item.isNode() ) { | ||
n = (Node)item; | ||
} else { | ||
n = item.getParent(); | ||
String nPath = e.getPath(); | ||
int nType = e.getType(); | ||
// is jump table faster than two bitwise comparisons? | ||
switch(nType) { | ||
case NODE_ADDED: | ||
break; | ||
case NODE_REMOVED: | ||
break; | ||
case PROPERTY_ADDED: | ||
nPath = nPath.substring(0, nPath.lastIndexOf('/')); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that true, nPath always ends with a trailing '/'? There is no risk of dropping the last path element? Side note: You probably chose not to, but falling-through on some of these "cases" could reduce some duplicate code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that Event.getPath always returns an absolute path, which should be path segments joined by '/'. I don't think there will ever be a terminal '/', and the code here was previously getting the parent node of the property as the Set member, so dropping the property name should be the desired behavior. In the olden days, there were minor performance gains from sorting the cases in a switch block. This may no longer be the case, but if we were going to change for readability I would want to just use the bitwise comparisons against (NODE_ADDED + NODE_REMOVED + NODE_MOVED) and (PROPERTY_ADDED + ...) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the cases where you are trimming to the last '/' is only in the case of "properties", it makes perfect sense to drop the property name. Re: switch logic, clearly have have considered the options and landed on the current approach. Let's stick with it. |
||
break; | ||
case PROPERTY_REMOVED: | ||
nPath = nPath.substring(0, nPath.lastIndexOf('/')); | ||
break; | ||
case PROPERTY_CHANGED: | ||
nPath = nPath.substring(0, nPath.lastIndexOf('/')); | ||
break; | ||
case NODE_MOVED: | ||
break; | ||
default: | ||
nPath = null; | ||
} | ||
if ( n != null && !posted.contains(n) ) { | ||
if ( nPath != null && !posted.contains(nPath) ) { | ||
EVENT_COUNTER.inc(); | ||
LOGGER.debug("Putting event: " + e.toString() | ||
+ " on the bus."); | ||
LOGGER.debug("Putting event: {} ({}) on the bus", nPath, nType); | ||
eventBus.post(new FedoraEvent(e)); | ||
posted.add(n); | ||
posted.add(nPath); | ||
} else { | ||
LOGGER.debug("Skipping: " + e); | ||
LOGGER.debug("Skipping event: {} ({}) on the bus", nPath, nType); | ||
} | ||
} catch (final PathNotFoundException ex) { | ||
// we can ignore these | ||
LOGGER.trace("Not a node in the Fedora workspace: " + e); | ||
} catch ( RepositoryException ex ) { | ||
throw propagate(ex); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/** | ||
* Copyright 2013 DuraSpace, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.fcrepo.kernel.services.functions; | ||
|
||
/** | ||
* Predicate to match nodes with all of the given mixin types | ||
* @author armintor@gmail.com | ||
* | ||
*/ | ||
public class AllTypesPredicate extends BooleanTypesPredicate { | ||
|
||
private final int test; | ||
|
||
/** | ||
* True if all the types specified match. | ||
* @param types | ||
*/ | ||
public AllTypesPredicate(String...types) { | ||
super(types); | ||
this.test = types.length; | ||
} | ||
|
||
@Override | ||
protected boolean test(final int matched) { | ||
return matched == test; | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/** | ||
* Copyright 2013 DuraSpace, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.fcrepo.kernel.services.functions; | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add:
|
||
/** | ||
* Predicate to match nodes with any of the given mixin types | ||
* @author armintor@gmail.com | ||
* | ||
*/ | ||
public class AnyTypesPredicate extends BooleanTypesPredicate { | ||
/** | ||
* True if any of the types specified match. | ||
* @param types | ||
*/ | ||
public AnyTypesPredicate(String...types) { | ||
super(types); | ||
} | ||
|
||
@Override | ||
protected boolean test(final int matched) { | ||
return matched > 0; | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/** | ||
* Copyright 2013 DuraSpace, Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.fcrepo.kernel.services.functions; | ||
|
||
import static com.google.common.base.Throwables.propagate; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
|
||
import javax.jcr.Node; | ||
import javax.jcr.RepositoryException; | ||
import javax.jcr.nodetype.NodeType; | ||
|
||
import com.google.common.base.Predicate; | ||
|
||
/** | ||
* Base class for matching sets of node types | ||
* @author armintor@gmail.com | ||
* | ||
*/ | ||
public abstract class BooleanTypesPredicate implements Predicate<Node> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be caught by Checkstyles (ditto the other examples in this PR)? If it isn't, I'll make a ticket for @awoods to see that it is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is apparently not caught :( by checkstyles. |
||
|
||
protected final Collection<String> nodeTypes; | ||
|
||
/** | ||
* Base constructor for function peforming boolean ops on matched node types. | ||
* @param types | ||
*/ | ||
public BooleanTypesPredicate(String... types) { | ||
nodeTypes = Arrays.asList(types); | ||
} | ||
|
||
@Override | ||
public boolean apply(Node input) { | ||
if (input == null) { | ||
throw new IllegalArgumentException( | ||
"null node passed to" + getClass().getName() | ||
); | ||
} | ||
int matched = 0; | ||
try { | ||
for (NodeType nodeType: input.getMixinNodeTypes()) { | ||
if (nodeTypes.contains(nodeType.getName())) { | ||
matched++; | ||
} | ||
} | ||
} catch (RepositoryException e) { | ||
propagate(e); | ||
} | ||
return test(matched); | ||
} | ||
|
||
protected abstract boolean test(int matched); | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,7 @@ | |
import javax.jcr.version.VersionHistory; | ||
|
||
import org.fcrepo.jcr.FedoraJcrTypes; | ||
import org.fcrepo.kernel.services.functions.AnyTypesPredicate; | ||
import org.joda.time.DateTime; | ||
import org.joda.time.DateTimeZone; | ||
import org.joda.time.format.DateTimeFormatter; | ||
|
@@ -142,37 +143,22 @@ public boolean apply(final Node node) { | |
/** | ||
* Predicate for determining whether this {@link Node} is a Fedora object. | ||
*/ | ||
public static Predicate<Node> isFedoraObject = new Predicate<Node>() { | ||
|
||
@Override | ||
public boolean apply(final Node node) { | ||
checkArgument(node != null, "null cannot be a Fedora object!"); | ||
try { | ||
return map(node.getMixinNodeTypes(), nodetype2name).contains( | ||
FEDORA_OBJECT); | ||
} catch (final RepositoryException e) { | ||
throw propagate(e); | ||
} | ||
} | ||
}; | ||
public static Predicate<Node> isFedoraObject = | ||
new AnyTypesPredicate(FEDORA_OBJECT); | ||
|
||
/** | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This null check did not make the migration to BooleanTypesPredicate. Was it deemed superfluous? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I deem null to be superfluous! Now for Thanksgiving, let the option type reign supreme, and let us all give thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no horse in this race; happy to add that checkArg back in. It was inadvertently dropped. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guava has a more precise check: Preconditions.checkNotNull(reference, errorMessage). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could also just throw an IllegalArgumentException if it's null. On Wed, Nov 27, 2013 at 11:51 AM, ajs6f notifications@github.com wrote:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is probably better to have a clear exception message versus a plain NPE in this case. Let's add it in. |
||
* Predicate for determining whether this {@link Node} is a Fedora | ||
* datastream. | ||
*/ | ||
public static Predicate<Node> isFedoraDatastream = new Predicate<Node>() { | ||
public static Predicate<Node> isFedoraDatastream = | ||
new AnyTypesPredicate(FEDORA_DATASTREAM); | ||
|
||
@Override | ||
public boolean apply(final Node node) { | ||
checkArgument(node != null, "null cannot be a Fedora datastream!"); | ||
try { | ||
return map(node.getMixinNodeTypes(), nodetype2name).contains( | ||
FEDORA_DATASTREAM); | ||
} catch (final RepositoryException e) { | ||
throw propagate(e); | ||
} | ||
} | ||
}; | ||
/** | ||
* Predicate for objects, datastreams, whatever! | ||
*/ | ||
|
||
public static Predicate<Node> isFedoraObjectOrDatastream = | ||
new AnyTypesPredicate(FEDORA_OBJECT, FEDORA_DATASTREAM); | ||
|
||
/** | ||
* Translates a {@link NodeType} to its {@link String} name. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the rationale for sizing the Set to: (events.size * 2 / 3)? versus some other value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
at an initial capacity of 2/3 max, and a load factor of 0.75, this should mean at most one resize of the set. We could size it accurately and set the load to 1, but my understanding is that performance is worse, and if we ignore some nodes we'l have slack anyway. Still, maybe not worth doing anything but setting the size to Math.min(16, (int)events.size())
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I trust your research and judgement in deciding between 2/3 or min(16). Thanks for providing the explanation.