Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ public void delete(KEY key) throws IOException {
table.delete(key);
}

@Override
public void deleteRange(KEY beginKey, KEY endKey) throws IOException {
table.deleteRange(beginKey, endKey);
}

@Override
public void deleteWithBatch(BatchOperation batch, KEY key)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ public void deleteWithBatch(BatchOperation batch, String key)
super.deleteWithBatch(batch, prefix(key));
}

@Override
public void deleteRange(String beginKey, String endKey) throws IOException {
super.deleteRange(prefix(beginKey), prefix(endKey));
}

@Override
public boolean isExist(String key) throws IOException {
return super.isExist(prefix(key));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ public void delete(byte[] key) throws IOException {
db.delete(family, key);
}

@Override
public void deleteRange(byte[] beginKey, byte[] endKey) throws IOException {
db.deleteRange(family, beginKey, endKey);
}

@Override
public void deleteWithBatch(BatchOperation batch, byte[] key)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,22 @@ public void delete(ColumnFamily family, byte[] key) throws IOException {
}
}

public void deleteRange(ColumnFamily family, byte[] beginKey, byte[] endKey)
throws IOException {
assertClose();
try {
counter.incrementAndGet();
db.get().deleteRange(family.getHandle(), beginKey, endKey);
} catch (RocksDBException e) {
closeOnError(e, true);
final String message = "delete range " + bytes2String(beginKey) +
" to " + bytes2String(endKey) + " from " + family;
throw toIOException(this, message, e);
} finally {
counter.decrementAndGet();
}
}

@Override
public String toString() {
return name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,15 @@ default VALUE getReadCopy(KEY key) throws IOException {
*/
void deleteWithBatch(BatchOperation batch, KEY key) throws IOException;

/**
* Deletes a range of keys from the metadata store.
*
* @param beginKey start metadata key
* @param endKey end metadata key
* @throws IOException on Failure
*/
void deleteRange(KEY beginKey, KEY endKey) throws IOException;

/**
* Returns the iterator for this metadata store.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,12 @@ public void delete(KEY key) throws IOException {
public void deleteWithBatch(BatchOperation batch, KEY key)
throws IOException {
rawTable.deleteWithBatch(batch, codecRegistry.asRawData(key));
}

@Override
public void deleteRange(KEY beginKey, KEY endKey) throws IOException {
rawTable.deleteRange(codecRegistry.asRawData(beginKey),
codecRegistry.asRawData(endKey));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class TestRDBTableStore {
"First", "Second", "Third",
"Fourth", "Fifth",
"Sixth", "Seventh",
"Eighth");
"Eighth", "Ninth");
private final List<String> prefixedFamilies = Arrays.asList(
"PrefixFirst",
"PrefixTwo", "PrefixThree",
Expand Down Expand Up @@ -165,7 +165,7 @@ public void delete() throws Exception {
}

// Write all the keys and delete the keys scheduled for delete.
//Assert we find only expected keys in the Table.
// Assert we find only expected keys in the Table.
try (Table testTable = rdbStore.getTable("Fourth")) {
for (int x = 0; x < deletedKeys.size(); x++) {
testTable.put(deletedKeys.get(x), value);
Expand All @@ -177,15 +177,78 @@ public void delete() throws Exception {
}

for (int x = 0; x < validKeys.size(); x++) {
Assertions.assertNotNull(testTable.get(validKeys.get(0)));
Assertions.assertNotNull(testTable.get(validKeys.get(x)));
}

for (int x = 0; x < deletedKeys.size(); x++) {
Assertions.assertNull(testTable.get(deletedKeys.get(0)));
Assertions.assertNull(testTable.get(deletedKeys.get(x)));
}
}
}

@Test
public void deleteRange() throws Exception {

// Prepare keys to be written to the test table
List<byte[]> keys = new ArrayList<>();
for (int x = 0; x < 100; x++) {
// Left pad DB keys with zeros
String k = String.format("%03d", x) + "-" + RandomStringUtils.random(6);
keys.add(k.getBytes(StandardCharsets.UTF_8));
}
// Some random value
byte[] val = RandomStringUtils.random(10).getBytes(StandardCharsets.UTF_8);

try (Table testTable = rdbStore.getTable("Ninth")) {

// Write keys to the table
for (int x = 0; x < keys.size(); x++) {
testTable.put(keys.get(x), val);
}

// All keys should exist at this point
for (int x = 0; x < keys.size(); x++) {
Assertions.assertNotNull(testTable.get(keys.get(x)));
}

// Delete a range of keys: [10th, 20th), zero-indexed
final int deleteRangeBegin = 10, deleteRangeEnd = 20;
byte[] dRangeBeginKey = keys.get(deleteRangeBegin);
byte[] dRangeEndKey = keys.get(deleteRangeEnd);

testTable.deleteRange(dRangeBeginKey, dRangeEndKey);

// Keys [10th, 20th) should be gone now
for (int x = deleteRangeBegin; x < deleteRangeEnd; x++) {
Assertions.assertNull(testTable.get(keys.get(x)));
}

// While the rest of the keys should be untouched
for (int x = 0; x < deleteRangeBegin; x++) {
Assertions.assertNotNull(testTable.get(keys.get(x)));
}
for (int x = deleteRangeEnd; x < 100; x++) {
Assertions.assertNotNull(testTable.get(keys.get(x)));
}

// Delete the rest of the keys
testTable.deleteRange(keys.get(0), keys.get(100 - 1));

// Confirm key deletion
for (int x = 0; x < 100 - 1; x++) {
Assertions.assertNull(testTable.get(keys.get(x)));
}
// The last key is still there because
// deleteRange() excludes the endKey by design
Assertions.assertNotNull(testTable.get(keys.get(100 - 1)));

// Delete the last key
testTable.delete(keys.get(100 - 1));
Assertions.assertNull(testTable.get(keys.get(100 - 1)));
}

}

@Test
public void batchPut() throws Exception {
try (Table testTable = rdbStore.getTable("Fifth");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.utils.DBStoreHAManager;
Expand Down Expand Up @@ -282,6 +283,13 @@ List<OmVolumeArgs> listVolumes(String userName, String prefix,
ExpiredOpenKeys getExpiredOpenKeys(Duration expireThreshold, int count,
BucketLayout bucketLayout) throws IOException;

/**
* Retrieve RWLock for the table.
* @param tableName table name.
* @return ReentrantReadWriteLock
*/
ReentrantReadWriteLock getTableLock(String tableName);

/**
* Returns the user Table.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ public List<RepeatedOmKeyInfo> listTrash(String volumeName,
@Override
public List<BlockGroup> getPendingDeletionKeys(final int count)
throws IOException {
return metadataManager.getPendingDeletionKeys(count);
return metadataManager.getPendingDeletionKeys(count);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -281,6 +282,16 @@ public class OmMetadataManagerImpl implements OMMetadataManager,
private boolean ignorePipelineinKey;
private Table deletedDirTable;

// Table-level locks that protects table read/write access. Note:
// Don't use this lock for tables other than deletedTable and deletedDirTable.
// This is a stopgap solution. Will remove when HDDS-5905 (HDDS-6483) is done.
private Map<String, ReentrantReadWriteLock> tableLockMap = new HashMap<>();

@Override
public ReentrantReadWriteLock getTableLock(String tableName) {
return tableLockMap.get(tableName);
}

// Epoch is used to generate the objectIDs. The most significant 2 bits of
// objectIDs is set to this epoch. For clusters before HDDS-4315 there is
// no epoch as such. But it can be safely assumed that the most significant
Expand Down Expand Up @@ -561,6 +572,8 @@ protected void initializeOmTables(boolean addCacheMetrics)
deletedTable = this.store.getTable(DELETED_TABLE, String.class,
RepeatedOmKeyInfo.class);
checkTableStatus(deletedTable, DELETED_TABLE, addCacheMetrics);
// Currently, deletedTable is the only table that will need the table lock
tableLockMap.put(DELETED_TABLE, new ReentrantReadWriteLock(true));

openKeyTable =
this.store.getTable(OPEN_KEY_TABLE, String.class,
Expand Down Expand Up @@ -1385,8 +1398,24 @@ public List<BlockGroup> getPendingDeletionKeys(final int keyCount)
while (keyIter.hasNext() && currentCount < keyCount) {
KeyValue<String, RepeatedOmKeyInfo> kv = keyIter.next();
if (kv != null) {
// Multiple keys with the same path can be queued in one DB entry
RepeatedOmKeyInfo infoList = kv.getValue();
for (OmKeyInfo info : infoList.cloneOmKeyInfoList()) {
// Skip the key if it exists in the previous snapshot (of the same
// scope) as in this case its blocks should not be reclaimed

// TODO: [SNAPSHOT] HDDS-7968
// 1. If previous snapshot keyTable has key info.getObjectID(),
// skip it. Pending HDDS-7740 merge to reuse the util methods to
// check previousSnapshot.
// 2. For efficient lookup, the addition in design doc 4.b)1.b
// is critical.
// 3. With snapshot it is possible that only some of the keys in
// the DB key's RepeatedOmKeyInfo list can be reclaimed,
// make sure to update deletedTable accordingly in this case.
// 4. Further optimization: Skip all snapshotted keys altogether
// e.g. by prefixing all unreclaimable keys, then calling seek

// Add all blocks from all versions of the key to the deletion list
for (OmKeyLocationInfoGroup keyLocations :
info.getKeyLocationVersions()) {
Expand Down
Loading