Skip to content

Commit

Permalink
HBASE-26421 Use HFileLink file to replace entire file's reference whe…
Browse files Browse the repository at this point in the history
…n splitting (#3842)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
  • Loading branch information
sunhelly committed Nov 18, 2021
1 parent 1c47f80 commit b2571df
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ public class HFileLink extends FileLink {
RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);

/** Define the HFile Link name parser in the form of: table=region-hfile */
//made package private for testing
static final Pattern LINK_NAME_PATTERN =
public static final Pattern LINK_NAME_PATTERN =
Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
Expand Down Expand Up @@ -400,15 +399,40 @@ public static boolean create(final Configuration conf, final FileSystem fs,
String tableName = CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent())
.getNameAsString();

return create(conf, fs, dstFamilyPath, familyName, tableName, regionName, linkedTable,
linkedRegion, hfileName, createBackRef);
}

/**
* Create a new HFileLink
*
* <p>It also adds a back-reference to the hfile back-reference directory
* to simplify the reference-count and the cleaning process.
* @param conf {@link Configuration} to read for the archive directory name
* @param fs {@link FileSystem} on which to write the HFileLink
* @param dstFamilyPath - Destination path (table/region/cf/)
* @param dstTableName - Destination table name
* @param dstRegionName - Destination region name
* @param linkedTable - Linked Table Name
* @param linkedRegion - Linked Region Name
* @param hfileName - Linked HFile name
* @param createBackRef - Whether back reference should be created. Defaults to true.
* @return true if the file is created, otherwise the file exists.
* @throws IOException on file or parent directory creation failure
*/
public static boolean create(final Configuration conf, final FileSystem fs,
final Path dstFamilyPath, final String familyName, final String dstTableName,
final String dstRegionName, final TableName linkedTable, final String linkedRegion,
final String hfileName, final boolean createBackRef) throws IOException {
String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
String refName = createBackReferenceName(tableName, regionName);
String refName = createBackReferenceName(dstTableName, dstRegionName);

// Make sure the destination directory exists
fs.mkdirs(dstFamilyPath);

// Make sure the FileLink reference directory exists
Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
linkedTable, linkedRegion, familyName);
linkedTable, linkedRegion, familyName);
Path backRefPath = null;
if (createBackRef) {
Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -142,13 +143,14 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env,
.setSplit(false)
.setRegionId(rid)
.build();
if(tableDescriptor.getRegionSplitPolicyClassName() != null) {

if (tableDescriptor.getRegionSplitPolicyClassName() != null) {
// Since we don't have region reference here, creating the split policy instance without it.
// This can be used to invoke methods which don't require Region reference. This instantiation
// of a class on Master-side though it only makes sense on the RegionServer-side is
// for Phoenix Local Indexing. Refer HBASE-12583 for more information.
Class<? extends RegionSplitPolicy> clazz =
RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
this.splitPolicy = ReflectionUtils.newInstance(clazz, conf);
}
}
Expand Down Expand Up @@ -624,16 +626,16 @@ public void createDaughterRegions(final MasterProcedureEnv env) throws IOExcepti

Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);

assertReferenceFileCount(fs, expectedReferences.getFirst(),
assertSplitResultFilesCount(fs, expectedReferences.getFirst(),
regionFs.getSplitsDir(daughterOneRI));
regionFs.commitDaughterRegion(daughterOneRI);
assertReferenceFileCount(fs, expectedReferences.getFirst(),
assertSplitResultFilesCount(fs, expectedReferences.getFirst(),
new Path(tabledir, daughterOneRI.getEncodedName()));

assertReferenceFileCount(fs, expectedReferences.getSecond(),
assertSplitResultFilesCount(fs, expectedReferences.getSecond(),
regionFs.getSplitsDir(daughterTwoRI));
regionFs.commitDaughterRegion(daughterTwoRI);
assertReferenceFileCount(fs, expectedReferences.getSecond(),
assertSplitResultFilesCount(fs, expectedReferences.getSecond(),
new Path(tabledir, daughterTwoRI.getEncodedName()));
}

Expand Down Expand Up @@ -773,11 +775,15 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
return new Pair<Integer, Integer>(daughterA, daughterB);
}

private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount,
final Path dir) throws IOException {
if (expectedReferenceFileCount != 0 &&
expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) {
throw new IOException("Failing split. Expected reference file count isn't equal.");
private void assertSplitResultFilesCount(final FileSystem fs,
final int expectedSplitResultFileCount, Path dir)
throws IOException {
if (expectedSplitResultFileCount != 0) {
int resultFileCount = FSUtils.getRegionReferenceAndLinkFileCount(fs, dir);
if (expectedSplitResultFileCount != resultFileCount) {
throw new IOException("Failing split. Didn't have expected reference and HFileLink files"
+ ", expected=" + expectedSplitResultFileCount + ", actual=" + resultFileCount);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1068,8 +1068,8 @@ private long initializeRegionInternals(final CancelableProgressable reporter,
}
}

LOG.info("Opened {}; next sequenceid={}; {}, {}",
this.getRegionInfo().getShortNameToLog(), nextSeqId, this.splitPolicy, this.flushPolicy);
LOG.info("Opened {}; next sequenceid={}; {}, {}", this.getRegionInfo().getShortNameToLog(),
nextSeqId, this.splitPolicy, this.flushPolicy);

// A region can be reopened if failed a split; reset flags
this.closing.set(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.regionserver;

import static org.apache.hadoop.hbase.io.HFileLink.LINK_NAME_PATTERN;
import edu.umd.cs.findbugs.annotations.Nullable;
import java.io.FileNotFoundException;
import java.io.IOException;
Expand All @@ -27,6 +28,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
import java.util.regex.Matcher;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
Expand All @@ -39,11 +41,13 @@
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
Expand All @@ -53,7 +57,6 @@
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;

/**
Expand Down Expand Up @@ -662,37 +665,75 @@ public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte
LOG.warn("Found an already existing split file for {}. Assuming this is a recovery.", p);
return p;
}
boolean createLinkFile = false;
if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
// Check whether the split row lies in the range of the store file
// If it is outside the range, return directly.
f.initReader();
try {
Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow);
Optional<Cell> lastKey = f.getLastKey();
Optional<Cell> firstKey = f.getFirstKey();
if (top) {
//check if larger than last key.
Optional<Cell> lastKey = f.getLastKey();
// If lastKey is null means storefile is empty.
if (!lastKey.isPresent()) {
return null;
}
if (f.getComparator().compare(splitKey, lastKey.get()) > 0) {
return null;
}
if (firstKey.isPresent() && f.getComparator().compare(splitKey, firstKey.get()) <= 0) {
LOG.debug("Will create HFileLink file for {}, top=true", f.getPath());
createLinkFile = true;
}
} else {
//check if smaller than first key
Optional<Cell> firstKey = f.getFirstKey();
// If firstKey is null means storefile is empty.
if (!firstKey.isPresent()) {
return null;
}
if (f.getComparator().compare(splitKey, firstKey.get()) < 0) {
return null;
}
if (lastKey.isPresent() && f.getComparator().compare(splitKey, lastKey.get()) >= 0) {
LOG.debug("Will create HFileLink file for {}, top=false", f.getPath());
createLinkFile = true;
}
}
} finally {
f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
}
}
if (createLinkFile) {
// create HFileLink file instead of Reference file for child
String hfileName = f.getPath().getName();
TableName linkedTable = regionInfoForFs.getTable();
String linkedRegion = regionInfoForFs.getEncodedName();
try {
if (HFileLink.isHFileLink(hfileName)) {
Matcher m = LINK_NAME_PATTERN.matcher(hfileName);
if (!m.matches()) {
throw new IllegalArgumentException(hfileName + " is not a valid HFileLink name!");
}
linkedTable = TableName.valueOf(m.group(1), m.group(2));
linkedRegion = m.group(3);
hfileName = m.group(4);
}
// must create back reference here
HFileLink.create(conf, fs, splitDir, familyName, hri.getTable().getNameAsString(),
hri.getEncodedName(), linkedTable, linkedRegion, hfileName, true);
Path path =
new Path(splitDir, HFileLink.createHFileLinkName(linkedTable, linkedRegion, hfileName));
LOG.info("Created linkFile:" + path.toString() + " for child: " + hri.getEncodedName()
+ ", parent: " + regionInfoForFs.getEncodedName());
return path;
} catch (IOException e) {
// if create HFileLink file failed, then just skip the error and create Reference file
LOG.error("Create link file for " + hfileName + " for child " + hri.getEncodedName()
+ "failed, will create Reference file", e);
}
}
// A reference to the bottom half of the hsf store file.
Reference r =
top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.io.IOException;
import java.util.List;
import java.util.Optional;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1056,28 +1056,65 @@ protected boolean accept(Path p, @CheckForNull Boolean isDir) {
* @return List of paths to valid family directories in region dir.
* @throws IOException
*/
public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir) throws IOException {
public static List<Path> getFamilyDirs(final FileSystem fs, final Path regionDir)
throws IOException {
// assumes we are in a region dir.
FileStatus[] fds = fs.listStatus(regionDir, new FamilyDirFilter(fs));
List<Path> familyDirs = new ArrayList<>(fds.length);
return getFilePaths(fs, regionDir, new FamilyDirFilter(fs));
}

public static List<Path> getReferenceFilePaths(final FileSystem fs, final Path familyDir)
throws IOException {
return getFilePaths(fs, familyDir, new ReferenceFileFilter(fs));
}

public static List<Path> getReferenceAndLinkFilePaths(final FileSystem fs, final Path familyDir)
throws IOException {
return getFilePaths(fs, familyDir, new ReferenceAndLinkFileFilter(fs));
}

private static List<Path> getFilePaths(final FileSystem fs, final Path dir,
final PathFilter pathFilter) throws IOException {
FileStatus[] fds = fs.listStatus(dir, pathFilter);
List<Path> files = new ArrayList<>(fds.length);
for (FileStatus fdfs: fds) {
Path fdPath = fdfs.getPath();
familyDirs.add(fdPath);
files.add(fdPath);
}
return familyDirs;
return files;
}

public static List<Path> getReferenceFilePaths(final FileSystem fs, final Path familyDir) throws IOException {
List<FileStatus> fds = listStatusWithStatusFilter(fs, familyDir, new ReferenceFileFilter(fs));
if (fds == null) {
return Collections.emptyList();
public static int getRegionReferenceAndLinkFileCount(final FileSystem fs, final Path p) {
int result = 0;
try {
for (Path familyDir : getFamilyDirs(fs, p)) {
result += getReferenceAndLinkFilePaths(fs, familyDir).size();
}
} catch (IOException e) {
LOG.warn("Error Counting reference files.", e);
}
List<Path> referenceFiles = new ArrayList<>(fds.size());
for (FileStatus fdfs: fds) {
Path fdPath = fdfs.getPath();
referenceFiles.add(fdPath);
return result;
}

public static class ReferenceAndLinkFileFilter implements PathFilter {

private final FileSystem fs;

public ReferenceAndLinkFileFilter(FileSystem fs) {
this.fs = fs;
}

@Override
public boolean accept(Path rd) {
try {
// only files can be references.
return !fs.getFileStatus(rd).isDirectory() && (StoreFileInfo.isReference(rd) ||
HFileLink.isHFileLink(rd));
} catch (IOException ioe) {
// Maybe the file was moved or the fs was disconnected.
LOG.warn("Skipping file " + rd +" due to IOException", ioe);
return false;
}
}
return referenceFiles;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,17 @@ public static void tearDownAfterClass() {

/**
* Write a file and then assert that we can read from top and bottom halves using two
* HalfMapFiles.
* HalfMapFiles, as well as one HalfMapFile and one HFileLink file.
*/
@Test
public void testBasicHalfMapFile() throws Exception {
public void testBasicHalfAndHFileLinkMapFile() throws Exception {
final RegionInfo hri =
RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfMapFileTb")).build();
RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build();
// The locations of HFileLink refers hfiles only should be consistent with the table dir
// create by CommonFSUtils directory, so we should make the region directory under
// the mode of CommonFSUtils.getTableDir here.
HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
new Path(testDir, hri.getTable().getNameAsString()), hri);
CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri);

HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
Expand Down Expand Up @@ -395,6 +398,8 @@ private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f
f.initReader();
Cell midkey = f.getReader().midKey().get();
KeyValue midKV = (KeyValue) midkey;
// 1. test using the midRow as the splitKey, this test will generate two Reference files
// in the children
byte[] midRow = CellUtil.cloneRow(midKV);
// Create top split.
RegionInfo topHri =
Expand Down Expand Up @@ -455,7 +460,7 @@ private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f
regionFs.cleanupDaughterRegion(topHri);
regionFs.cleanupDaughterRegion(bottomHri);

// Next test using a midkey that does not exist in the file.
// 2. test using a midkey which will generate one Reference file and one HFileLink file.
// First, do a key that is < than first key. Ensure splits behave
// properly.
byte[] badmidkey = Bytes.toBytes(" .");
Expand Down
Loading

0 comments on commit b2571df

Please sign in to comment.