From 84e815786f78c9391962ffd788265946abe546d2 Mon Sep 17 00:00:00 2001 From: matthpeterson Date: Fri, 16 Jun 2017 15:51:38 -0400 Subject: [PATCH 1/4] Update MetadataTableUtil.java Outputting every "loaded" entry in the table is excessive, especially for tables with multiple simultaneous bulk imports and multiple references to the same file. This has been seen to cause performance problems. Even when the log level was reduced, there was blocking within log4j. By doing that check once outside the loop and only logging at trace level, we have seen bulk import performance improvements. --- .../accumulo/server/util/MetadataTableUtil.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java index c07114ba353..5882774a250 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java @@ -894,12 +894,19 @@ public static void removeBulkLoadEntries(Connector conn, String tableId, long ti BatchWriter bw = conn.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig())) { mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); + boolean shouldTrace = log.isTraceEnabled(); + String tidString = Long.toString(tid); for (Entry entry : mscanner) { - log.debug("Looking at entry " + entry + " with tid " + tid); - if (Long.parseLong(entry.getValue().toString()) == tid) { - log.debug("deleting entry " + entry); - Mutation m = new Mutation(entry.getKey().getRow()); - m.putDelete(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier()); + if (shouldTrace) { + log.trace("Looking at entry " + entry + " with tid " + tidString); + } + if (entry.getValue().toString().equals(tidString)) { + if (shouldTrace) { + log.trace("deleting entry " + entry); + } + Key key = entry.getKey(); + Mutation m = new Mutation(key.getRow()); + m.putDelete(key.getColumnFamily(), key.getColumnQualifier()); bw.addMutation(m); } } From f6762179c4d71bbf76c54a079f155c4a7e7660d8 Mon Sep 17 00:00:00 2001 From: matthpeterson Date: Tue, 20 Jun 2017 08:22:17 -0400 Subject: [PATCH 2/4] Updated String to byte array for faster comparison --- .../org/apache/accumulo/server/util/MetadataTableUtil.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java index 5882774a250..7b453384b83 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -895,12 +896,12 @@ public static void removeBulkLoadEntries(Connector conn, String tableId, long ti mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); boolean shouldTrace = log.isTraceEnabled(); - String tidString = Long.toString(tid); + byte[] tidAsBytes = Long.toString(tid).getBytes(); for (Entry entry : mscanner) { if (shouldTrace) { - log.trace("Looking at entry " + entry + " with tid " + tidString); + log.trace("Looking at entry " + entry + " with tid " + tid); } - if (entry.getValue().toString().equals(tidString)) { + if (Arrays.equals(entry.getValue().get(), tidAsBytes)) { if (shouldTrace) { log.trace("deleting entry " + entry); } From df64d6cc8a598d8ee0b4b2503b64ebf425dd22b6 Mon Sep 17 00:00:00 2001 From: matthpeterson Date: Tue, 20 Jun 2017 08:30:19 -0400 Subject: [PATCH 3/4] Update MetadataTableUtil.java --- .../java/org/apache/accumulo/server/util/MetadataTableUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java index 7b453384b83..4a3266ddd26 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java @@ -896,7 +896,7 @@ public static void removeBulkLoadEntries(Connector conn, String tableId, long ti mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); boolean shouldTrace = log.isTraceEnabled(); - byte[] tidAsBytes = Long.toString(tid).getBytes(); + byte[] tidAsBytes = Long.toString(tid).getBytes(UTF_8); for (Entry entry : mscanner) { if (shouldTrace) { log.trace("Looking at entry " + entry + " with tid " + tid); From 01654c0a94c29a99fd1c6abed44d04fffd17b2f8 Mon Sep 17 00:00:00 2001 From: matthpeterson Date: Tue, 20 Jun 2017 11:32:45 -0400 Subject: [PATCH 4/4] Update MetadataTableUtil.java --- .../org/apache/accumulo/server/util/MetadataTableUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java index 4a3266ddd26..54ca37cebc7 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java @@ -899,11 +899,11 @@ public static void removeBulkLoadEntries(Connector conn, String tableId, long ti byte[] tidAsBytes = Long.toString(tid).getBytes(UTF_8); for (Entry entry : mscanner) { if (shouldTrace) { - log.trace("Looking at entry " + entry + " with tid " + tid); + log.trace("Looking at entry {} with tid {}", entry, tid); } if (Arrays.equals(entry.getValue().get(), tidAsBytes)) { if (shouldTrace) { - log.trace("deleting entry " + entry); + log.trace("deleting entry {}", entry); } Key key = entry.getKey(); Mutation m = new Mutation(key.getRow());