Skip to content

Commit

Permalink
HBASE-28216 HDFS erasure coding support for table data dirs (#5591)
Browse files Browse the repository at this point in the history
Signed-off-by: Nihal Jain <nihaljain@apache.org>
Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
Signed-off-by: Duo Zhang <zhangduo@apache.org>
  • Loading branch information
bbeaudreault committed Feb 5, 2024
1 parent c28e285 commit 989bfea
Show file tree
Hide file tree
Showing 19 changed files with 892 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ public class HTableDescriptor implements TableDescriptor, Comparable<HTableDescr
TableDescriptorBuilder.DEFAULT_REGION_REPLICATION;
public static final boolean DEFAULT_REGION_MEMSTORE_REPLICATION =
TableDescriptorBuilder.DEFAULT_REGION_MEMSTORE_REPLICATION;

public static final String ERASURE_CODING_POLICY = TableDescriptorBuilder.ERASURE_CODING_POLICY;
protected final ModifyableTableDescriptor delegatee;

/**
Expand Down Expand Up @@ -260,6 +262,28 @@ public HTableDescriptor setCompactionEnabled(final boolean isEnable) {
return this;
}

/**
* Sets the HDFS erasure coding policy for the table. This will be propagated to HDFS for the data
* dir of the table. Erasure coding is an alternative to normal replication which takes less space
* at the cost of locality. The policy must be available and enabled on the hdfs cluster before
* being set.
* @param policy the policy to set, or null to disable erasure coding
*/
public HTableDescriptor setErasureCodingPolicy(final String policy) {
getDelegateeForModification().setErasureCodingPolicy(policy);
return this;
}

/**
* The HDFS erasure coding policy for a table. This will be set on the data dir of the table, and
* is an alternative to normal replication which takes less space at the cost of locality.
* @return the current policy, or null if undefined
*/
@Override
public String getErasureCodingPolicy() {
return delegatee.getErasureCodingPolicy();
}

/**
* Check if the region split enable flag of the table is true. If flag is false then no split will
* be done.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,15 @@ default Collection<String> getCoprocessors() {
*/
boolean isReadOnly();

/**
* The HDFS erasure coding policy for a table. This will be set on the data dir of the table, and
* is an alternative to normal replication which takes less space at the cost of locality.
* @return the current policy, or null if undefined
*/
default String getErasureCodingPolicy() {
return null;
}

/**
* Returns Name of this table and then a map of all of the column family descriptors (with only
* the non-default column family attributes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ public class TableDescriptorBuilder {
private static final Bytes REGION_MEMSTORE_REPLICATION_KEY =
new Bytes(Bytes.toBytes(REGION_MEMSTORE_REPLICATION));

/**
* If non-null, the HDFS erasure coding policy to set on the data dir of the table
*/
public static final String ERASURE_CODING_POLICY = "ERASURE_CODING_POLICY";
private static final Bytes ERASURE_CODING_POLICY_KEY =
new Bytes(Bytes.toBytes(ERASURE_CODING_POLICY));

private static final String DEFAULT_ERASURE_CODING_POLICY = null;
/**
* Used by shell/rest interface to access this metadata attribute which denotes if the table
* should be treated by region normalizer.
Expand Down Expand Up @@ -234,6 +242,7 @@ public class TableDescriptorBuilder {
DEFAULT_VALUES.put(DURABILITY, DEFAULT_DURABLITY.name()); // use the enum name
DEFAULT_VALUES.put(REGION_REPLICATION, String.valueOf(DEFAULT_REGION_REPLICATION));
DEFAULT_VALUES.put(PRIORITY, String.valueOf(DEFAULT_PRIORITY));
DEFAULT_VALUES.put(ERASURE_CODING_POLICY, String.valueOf(DEFAULT_ERASURE_CODING_POLICY));
DEFAULT_VALUES.keySet().stream().map(s -> new Bytes(Bytes.toBytes(s)))
.forEach(RESERVED_KEYWORDS::add);
RESERVED_KEYWORDS.add(IS_META_KEY);
Expand Down Expand Up @@ -532,6 +541,11 @@ public TableDescriptorBuilder setReadOnly(final boolean readOnly) {
return this;
}

public TableDescriptorBuilder setErasureCodingPolicy(String policy) {
desc.setErasureCodingPolicy(policy);
return this;
}

public TableDescriptorBuilder setRegionMemStoreReplication(boolean memstoreReplication) {
desc.setRegionMemStoreReplication(memstoreReplication);
return this;
Expand Down Expand Up @@ -802,6 +816,28 @@ public ModifyableTableDescriptor setReadOnly(final boolean readOnly) {
return setValue(READONLY_KEY, Boolean.toString(readOnly));
}

/**
* The HDFS erasure coding policy for a table. This will be set on the data dir of the table,
* and is an alternative to normal replication which takes less space at the cost of locality.
* @return the current policy, or null if undefined
*/
@Override
public String getErasureCodingPolicy() {
return getValue(ERASURE_CODING_POLICY);
}

/**
* Sets the HDFS erasure coding policy for the table. This will be propagated to HDFS for the
* data dir of the table. Erasure coding is an alternative to normal replication which takes
* less space at the cost of locality. The policy must be available and enabled on the hdfs
* cluster before being set.
* @param policy the policy to set, or null to disable erasure coding
* @return the modifyable TD
*/
public ModifyableTableDescriptor setErasureCodingPolicy(String policy) {
return setValue(ERASURE_CODING_POLICY_KEY, policy);
}

/**
* Check if the compaction enable flag of the table is true. If flag is false then no
* minor/major compactions will be done in real.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,10 +365,10 @@ public void testStringCustomizedValues() throws HBaseException {
htd.toStringCustomizedValues());

htd = TableDescriptorBuilder.newBuilder(htd).setMaxFileSize("10737942528")
.setMemStoreFlushSize("256MB").build();
.setMemStoreFlushSize("256MB").setErasureCodingPolicy("RS-6-3-1024k").build();
assertEquals(
"'testStringCustomizedValues', " + "{TABLE_ATTRIBUTES => {DURABILITY => 'ASYNC_WAL', "
+ "MAX_FILESIZE => '10737942528 B (10GB 512KB)', "
+ "ERASURE_CODING_POLICY => 'RS-6-3-1024k', MAX_FILESIZE => '10737942528 B (10GB 512KB)', "
+ "MEMSTORE_FLUSHSIZE => '268435456 B (256MB)'}}, "
+ "{NAME => 'cf', BLOCKSIZE => '131072 B (128KB)'}",
htd.toStringCustomizedValues());
Expand Down
5 changes: 5 additions & 0 deletions hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ enum CreateTableState {
CREATE_TABLE_ASSIGN_REGIONS = 4;
CREATE_TABLE_UPDATE_DESC_CACHE = 5;
CREATE_TABLE_POST_OPERATION = 6;
CREATE_TABLE_SET_ERASURE_CODING_POLICY = 7;
}

message CreateTableStateData {
Expand All @@ -74,6 +75,7 @@ enum ModifyTableState {
MODIFY_TABLE_REOPEN_ALL_REGIONS = 7;
MODIFY_TABLE_CLOSE_EXCESS_REPLICAS = 8;
MODIFY_TABLE_ASSIGN_NEW_REPLICAS = 9;
MODIFY_TABLE_SYNC_ERASURE_CODING_POLICY = 10;
}

message ModifyTableStateData {
Expand Down Expand Up @@ -267,6 +269,7 @@ enum CloneSnapshotState {
CLONE_SNAPSHOT_UPDATE_DESC_CACHE = 5;
CLONE_SNAPSHOT_POST_OPERATION = 6;
CLONE_SNAPHOST_RESTORE_ACL = 7;
CLONE_SNAPSHOT_SET_ERASURE_CODING_POLICY = 8;
}

message CloneSnapshotStateData {
Expand All @@ -285,6 +288,7 @@ enum RestoreSnapshotState {
RESTORE_SNAPSHOT_WRITE_FS_LAYOUT = 3;
RESTORE_SNAPSHOT_UPDATE_META = 4;
RESTORE_SNAPSHOT_RESTORE_ACL = 5;
RESTORE_SNAPSHOT_SYNC_ERASURE_CODING_POLICY = 6;
}

message RestoreSnapshotStateData {
Expand All @@ -296,6 +300,7 @@ message RestoreSnapshotStateData {
repeated RegionInfo region_info_for_add = 6;
repeated RestoreParentToChildRegionsPair parent_to_child_regions_pair_list = 7;
optional bool restore_acl = 8;
required TableSchema old_table_schema = 9;
}

enum DispatchMergingRegionsState {
Expand Down
Loading

0 comments on commit 989bfea

Please sign in to comment.