Skip to content

Commit

Permalink
Fix the HierarchyConverter to produce a consistent result
Browse files Browse the repository at this point in the history
- Implement to convert/revert the path for all path segments
- Refactor the auto-hierarchy support for the content file

Resolves: https://www.pivotaltracker.com/story/show/71140570
  • Loading branch information
lsitu authored and Andrew Woods committed May 14, 2014
1 parent 3b2a723 commit 3b57136
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 53 deletions.
Expand Up @@ -17,30 +17,32 @@

import static com.google.common.base.Joiner.on;
import static com.google.common.base.Splitter.fixedLength;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.getLast;
import static com.google.common.collect.Iterables.transform;
import static com.google.common.collect.Lists.transform;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static java.util.UUID.randomUUID;
import static org.fcrepo.jcr.FedoraJcrTypes.FCR_CONTENT;
import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT;
import static org.slf4j.LoggerFactory.getLogger;

import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;

import org.slf4j.Logger;

import com.google.common.base.Function;

/**
* Injects and extracts segments of hierarchy before the last segment of a
* multi-part identifier to ensure efficient performance of the JCR.
* Injects and extracts segments of hierarchy in a multi-part identifier
* to ensure efficient performance of the JCR.
*
* @author ajs6f
* @date Mar 26, 2014
* @author lsitu
* @date May 9, 2014
*/
public class HierarchyConverter extends InternalIdentifierConverter {

Expand Down Expand Up @@ -69,83 +71,136 @@ public String apply(final String input) {
private static final Logger log = getLogger(HierarchyConverter.class);

/*
* (non-Javadoc)
* Convert an incoming path to auto-hierarchy path for JCR backend storage.
* For example: /parent/child => /xx/xx/parent/xx/xx/child
* @see com.google.common.base.Converter#doBackward(java.lang.Object)
*/
@Override
protected String doBackward(final String flat) {
log.debug("Converting incoming identifier: {}", flat);
String nonContentSuffixed = flat;
// strip content-indicating suffix
if (flat.endsWith(FCR_CONTENT)) {
nonContentSuffixed = flat.substring(0, flat.length() - (FCR_CONTENT.length()));
final boolean isContentFile = flat.endsWith(FCR_CONTENT) || flat.endsWith(JCR_CONTENT);
if (nonContentSuffixed.startsWith(separator)) {
nonContentSuffixed = nonContentSuffixed.substring(1, nonContentSuffixed.length());
}
final List<String> hierarchySegments = createHierarchySegments();
final List<String> flatSegments = asList(nonContentSuffixed.split(separator));
List<String> firstSegments = emptyList();
List<String> lastSegment = emptyList();
if (flatSegments.size() == 0) {
// either empty identifier or separator identifier
return on(separator).join(hierarchySegments);
List<String> hierarchySegments = null;
final List<String> jcrPathSegments = new ArrayList<>();
int pathSize = flatSegments.size();
if (isContentFile) {
jcrPathSegments.add(JCR_CONTENT);
// Subtract the fcr:content namespace element for auto-hierarchy
pathSize -= 1;
}
if (flatSegments.size() > 1) {
lastSegment = singletonList(getLast(flatSegments));
firstSegments = flatSegments.subList(0, flatSegments.size() - 1);
} else {
// just one segment
lastSegment = singletonList(flatSegments.get(0));
if (pathSize == 0) {
// either empty identifier or separator identifier
return nonContentSuffixed;
}
final Iterable<String> allSegments = concat(firstSegments, hierarchySegments, lastSegment);
if (flat.endsWith(FCR_CONTENT)) {
return on(separator).join(concat(allSegments, singletonList(JCR_CONTENT)));
for (int i = pathSize - 1; i >= 0; i--) {
final String seg = flatSegments.get(i);
if (seg == null || seg.length() == 0) {
if (pathSize == 1) {
return flat;
}
} else {
// Create the auto-hierarchy path segments, add them to
// the list in front of the transparent path segment
hierarchySegments = createHierarchySegments(hashKey(singletonList(seg)));
jcrPathSegments.add(0, flatSegments.get(i));
jcrPathSegments.addAll(0, hierarchySegments);
}
}
return on(separator).join(allSegments);
final String pathConverted = on(separator).join(jcrPathSegments);
log.trace("Converted incoming identifier \"{}\" to \"{}\".", flat, pathConverted);
return "/" + pathConverted;
}

/*
* (non-Javadoc)
* Convert an outgoing JCR hierarchy path to transparent path.
* For example: /xx/xx/parent/xx/xx/child => /parent/child
* @see com.google.common.base.Converter#doForward(java.lang.Object)
*/
@Override
protected String doForward(final String hierarchical) {
log.debug("Converting outgoing identifier: {}", hierarchical);
String nonContentSuffixed = hierarchical;
// strip content-indicating suffix
if (hierarchical.endsWith(JCR_CONTENT)) {
nonContentSuffixed = hierarchical.substring(0, hierarchical.length() - (JCR_CONTENT.length()));
if (nonContentSuffixed.startsWith(separator)) {
nonContentSuffixed = nonContentSuffixed.substring(1, nonContentSuffixed.length());
}
final List<String> jcrPathSegments = asList(nonContentSuffixed.split(separator));
int jcrPathSize = jcrPathSegments.size();
final boolean isContentFile = hierarchical.endsWith(JCR_CONTENT);
if (isContentFile) {
jcrPathSize -= 1;
}
final List<String> segments = asList(nonContentSuffixed.split(separator));
if (segments.size() <= levels) {
if (jcrPathSize <= levels) {
// must be a root identifier
return "";
return hierarchical;
}
List<String> firstSegments = emptyList();
List<String> lastSegment = emptyList();
if (segments.size() > levels + 1) {
// we subtract one for the final segment, then levels for the
// inserted hierarchy segments we want to remove
firstSegments = segments.subList(0, segments.size() - 1 - levels);
lastSegment = singletonList(getLast(segments));
} else {
// just the trailing non-hierarchical segment
lastSegment = singletonList(getLast(segments));
// Convert the auto-hierarchy path to transparent path
final List<String> pathSegments = new ArrayList<>();
List<String> hierarchySegments = null;
for (int i = levels ; i < jcrPathSize; i += levels + 1) {
final String seg = jcrPathSegments.get(i);
if (seg == null || seg.length() == 0) {
// If the segment null or empty, must be double slash or something wrong.
// Add it as empty that will produce an slash for now?
pathSegments.add("");
} else {
pathSegments.add(seg);
hierarchySegments = jcrPathSegments.subList(i - levels, i);
final String hierarchyPath = on(separator).join(hierarchySegments);
final String hashPath = on(separator).join(createHierarchySegments(seg));
if (!hierarchyPath.equals(hashPath)) {
log.error("Outgoing sub path {} hierarchy {} (got {}) doesn't match the path segament {}.",
on(separator).join(jcrPathSegments.subList(0, i + 1)), hierarchyPath, hashPath, seg);
}
}
}
if (hierarchical.endsWith(JCR_CONTENT)) {
return on(separator).join(concat(firstSegments, lastSegment, singletonList(FCR_CONTENT)));

if (isContentFile) {
//Don't forget the fcr:content segment for content files
pathSegments.add(FCR_CONTENT);
}
return on(separator).join(concat(firstSegments, lastSegment));
final String pathConverted = on(separator).join(pathSegments);
log.trace("Converted outgoing identifier \"{}\" to \"{}\".", hierarchical, pathConverted);
return "/" + pathConverted;
}

private List<String> createHierarchySegments() {
private List<String> createHierarchySegments(final String path) {
// offers a list of segments sliced out of a UUID, each prefixed
if (levels == 0) {
return emptyList();
}
return transform(fixedLength(length).splitToList(createHierarchyCharacterBlock()), addPrefix);
return transform(fixedLength(length).splitToList(createHierarchyCharacterBlock(path)), addPrefix);
}

private String hashKey(final List<String> pathSegments) {
return on(separator).join(pathSegments);
}

private CharSequence createHierarchyCharacterBlock(final String path) {
String hierarchyPath = "";
MessageDigest md;
try {
md = MessageDigest.getInstance("SHA-1");
md.reset();
md.update(path.getBytes("utf8"));
hierarchyPath = byteToHex(md.digest()).replace("-", "").substring(0, length * levels);
} catch (final Exception e) {
log.error("Hierarchy conversion auto-hierarchy", e);
}
return hierarchyPath;
}

private CharSequence createHierarchyCharacterBlock() {
return randomUUID().toString().replace("-", "").substring(0, length * levels);
private static String byteToHex(final byte[] hash) {
final Formatter formatter = new Formatter();
for (final byte b : hash) {
formatter.format("%02x", b);
}
final String result = formatter.toString();
formatter.close();
return result.toLowerCase();
}

/**
Expand Down
Expand Up @@ -17,6 +17,7 @@

import static com.google.common.base.Joiner.on;
import static com.google.common.base.Strings.repeat;
import static java.util.Arrays.asList;
import static java.util.Collections.nCopies;
import static java.util.regex.Pattern.compile;
import static org.fcrepo.jcr.FedoraJcrTypes.FCR_CONTENT;
Expand All @@ -26,6 +27,7 @@
import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT;
import static org.slf4j.LoggerFactory.getLogger;

import java.util.Iterator;
import java.util.regex.Matcher;

import org.junit.Before;
Expand All @@ -34,6 +36,10 @@

public class HierarchyConverterTest {

private static final byte DEFAULT_LEVEL = 4;

private static final byte DEFAULT_LENGTH = 2;

private static final String separator = "/";

private HierarchyConverter testTranslator;
Expand All @@ -44,7 +50,7 @@ public class HierarchyConverterTest {

private static final String endingSegment = testIdSegments[2];

private final String testId = on(separator).join(testIdSegments);
private final String testId = "/" + on(separator).join(testIdSegments);

private static final Logger log = getLogger(HierarchyConverterTest.class);

Expand Down Expand Up @@ -85,8 +91,11 @@ public void testRoundTrip(final byte levels, final byte length) {
testTranslator.setLevels(levels);
testTranslator.setLength(length);
final String result = testTranslator.reverse().convert(testId);
final String testRegexp =
startingSegments + separator + hierarchyRegexpSection(levels, length) + separator + endingSegment;
String testRegexp = "/";
for( final Iterator<String> it = asList(startingSegments.split(separator)).iterator(); it.hasNext();) {
testRegexp += hierarchyRegexpSection(levels, length) + separator + it.next() + separator;
}
testRegexp += hierarchyRegexpSection(levels, length) + separator + endingSegment;
final Matcher matches = compile(testRegexp).matcher(result);
log.debug("Got result of translation: {}", result);
log.debug("Matching against test pattern: {}", testRegexp);
Expand All @@ -95,6 +104,34 @@ public void testRoundTrip(final byte levels, final byte length) {
assertEquals("Didn't get original back!", testId, shouldBeOriginal);
}

@Test
public void sameIncomingPathOutput() {
testTranslator.setLevels(DEFAULT_LEVEL);
testTranslator.setLength(DEFAULT_LENGTH);
final String result = testTranslator.reverse().convert(testId);
final String shouldBeTheSame = testTranslator.reverse().convert(testId);
log.debug("Converted path {} to {} from incoming path {}!", result, shouldBeTheSame, testId);
assertEquals("Didn't get the same backend path!", result, shouldBeTheSame);
}

@Test
public void consistencyConversion() {
testTranslator.setLevels(DEFAULT_LEVEL);
testTranslator.setLength(DEFAULT_LENGTH);
for(int i = 0; i < 5; i++) {
final String result = testTranslator.reverse().convert(testId);
final String shouldBeTheSame = testTranslator.reverse().convert(testId);
log.debug("Didn't get the same backend path: expected {} but got {} from incoming path {}!", result, shouldBeTheSame, testId);
assertEquals("Didn't get the same backend path!", result, shouldBeTheSame);
}
final String hierarchyPath = testTranslator.reverse().convert(testId);
for(int i = 5; i > 0; i--) {
String reverted = testTranslator.convert(hierarchyPath);
log.debug("Reverted path {} to {} from original path {}!", hierarchyPath, reverted, testId);
assertEquals("Didn't get the same transparent path!", testId, reverted);
}
}

private static String hierarchyRegexpSection(final byte levels, final byte length) {
return on(separator).join(nCopies(levels, repeat("\\w", length)));
}
Expand All @@ -121,10 +158,21 @@ public void testWeirdIds() {

result = testTranslator.reverse().convert(separator);
log.debug("Separator identifier translated into {}.", separator, result);
assertEquals("Should have altered separator identifier to empty identifier!", "", testTranslator
assertEquals("Should not have altered separator identifier!", separator, testTranslator
.convert(result));
}

@Test
public void testOutgoingDoubleSlashPattern() {
testTranslator.setLevels(3);
testTranslator.setLength(1);
String hirarchy = testTranslator.reverse().convert(testId);
String outgoingPath = "a/b/c/" + hirarchy;
String result = testTranslator.convert(outgoingPath);
log.debug("Outgoing path {} converted to transparent path {}.", outgoingPath, result);
assertEquals("Should not have altered the outgoing double slash!", "/" + testId, result);
}

@Test
public void testContentPaths() {
testTranslator.setLevels(0);
Expand Down

0 comments on commit 3b57136

Please sign in to comment.