Skip to content
Permalink
Browse files
Improve the madness in the GC (#2716)
* Add comments to makeRelative method and simplify
* Create new emptyPathsTest in GarbageCollectionTest

Co-authored-by: Christopher Tubbs <ctubbsii@apache.org>
  • Loading branch information
milleruntime and ctubbsii committed May 19, 2022
1 parent 6c5e568 commit 76aaba0ec4e086ab535a1e88ba6ecb0f62481b2c
Showing 2 changed files with 51 additions and 24 deletions.
@@ -18,8 +18,10 @@
*/
package org.apache.accumulo.gc;

import static java.util.Arrays.stream;
import static java.util.function.Predicate.not;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
@@ -52,59 +54,62 @@ public class GarbageCollectionAlgorithm {

private static final Logger log = LoggerFactory.getLogger(GarbageCollectionAlgorithm.class);

/**
* This method takes a file or directory path and returns a relative path in 1 of 2 forms:
*
* <pre>
* 1- For files: table-id/tablet-directory/filename.rf
* 2- For directories: table-id/tablet-directory
* </pre>
*
* For example, for full file path like hdfs://foo:6000/accumulo/tables/4/t0/F000.rf it will
* return 4/t0/F000.rf. For a directory that already is relative, like 4/t0, it will just return
* the original path. This method will also remove prefixed relative paths like ../4/t0/F000.rf
* and return 4/t0/F000.rf. It also strips out empty tokens from paths like
* hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf returning 4/t0/F001.rf.
*/
private String makeRelative(String path, int expectedLen) {
String relPath = path;

// remove prefixed old relative path
if (relPath.startsWith("../"))
relPath = relPath.substring(3);

// remove trailing slash
while (relPath.endsWith("/"))
relPath = relPath.substring(0, relPath.length() - 1);

// remove beginning slash
while (relPath.startsWith("/"))
relPath = relPath.substring(1);

String[] tokens = relPath.split("/");

// handle paths like a//b///c
boolean containsEmpty = false;
for (String token : tokens) {
if (token.equals("")) {
containsEmpty = true;
break;
}
}

if (containsEmpty) {
ArrayList<String> tmp = new ArrayList<>();
for (String token : tokens) {
if (!token.equals("")) {
tmp.add(token);
}
}

tokens = tmp.toArray(new String[tmp.size()]);
}
// Handle paths like a//b///c by dropping the empty tokens.
String[] tokens = stream(relPath.split("/")).filter(not(""::equals)).toArray(String[]::new);

if (tokens.length > 3 && path.contains(":")) {
// full file path like hdfs://foo:6000/accumulo/tables/4/t0/F000.rf
if (tokens[tokens.length - 4].equals(Constants.TABLE_DIR)
&& (expectedLen == 0 || expectedLen == 3)) {
// return the last 3 tokens after tables, like 4/t0/F000.rf
relPath = tokens[tokens.length - 3] + "/" + tokens[tokens.length - 2] + "/"
+ tokens[tokens.length - 1];
} else if (tokens[tokens.length - 3].equals(Constants.TABLE_DIR)
&& (expectedLen == 0 || expectedLen == 2)) {
// return the last 2 tokens after tables, like 4/t0
relPath = tokens[tokens.length - 2] + "/" + tokens[tokens.length - 1];
} else {
throw new IllegalArgumentException(path);
throw new IllegalArgumentException("Failed to make path relative. Bad reference: " + path);
}
} else if (tokens.length == 3 && (expectedLen == 0 || expectedLen == 3)
&& !path.contains(":")) {
// we already have a relative path so return it, like 4/t0/F000.rf
relPath = tokens[0] + "/" + tokens[1] + "/" + tokens[2];
} else if (tokens.length == 2 && (expectedLen == 0 || expectedLen == 2)
&& !path.contains(":")) {
// return the last 2 tokens of the relative path, like 4/t0
relPath = tokens[0] + "/" + tokens[1];
} else {
throw new IllegalArgumentException(path);
throw new IllegalArgumentException("Failed to make path relative. Bad reference: " + path);
}

log.trace("{} -> {} expectedLen = {}", path, relPath, expectedLen);
@@ -290,6 +290,28 @@ public void testBasic2() throws Exception {
"hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
}

/**
* Tests valid file paths that have empty tokens.
*/
@Test
public void emptyPathsTest() throws Exception {
TestGCE gce = new TestGCE();

gce.candidates.add("hdfs://foo:6000/accumulo/tables/4//t0//F000.rf");
gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf");
gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/5//t0//F005.rf");
gce.candidates.add("hdfs://foo.com:6000/accumulo//tables//6/t0/F006.rf");

gce.addFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4//t0//F000.rf");
gce.addFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4//t0//F001.rf");
gce.addFileReference("6", null, "hdfs://foo.com:6000/accumulo//tables//6/t0/F006.rf");

GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
gca.collect(gce);

assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/5//t0//F005.rf");
}

@Test
public void testRelative() throws Exception {
TestGCE gce = new TestGCE();

0 comments on commit 76aaba0

Please sign in to comment.