From 5bd0faf76d560e83ddda55b0efa8157ddc5ff59c Mon Sep 17 00:00:00 2001 From: milleruntime Date: Wed, 27 Jul 2016 14:46:29 -0400 Subject: [PATCH 1/8] ACCUMULO-1787: created TwoTierCompactionStrategy and its Test --- .../compaction/TwoTierCompactionStrategy.java | 94 +++++++++++++++ .../TwoTierCompactionStrategyTest.java | 111 ++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java create mode 100644 server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java new file mode 100644 index 00000000000..792d32d5a21 --- /dev/null +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.tserver.compaction; + +import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.accumulo.core.conf.AccumuloConfiguration; +import org.apache.accumulo.core.conf.Property; +import org.apache.accumulo.core.metadata.schema.DataFileValue; +import org.apache.accumulo.server.fs.FileRef; + +/** + * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is less than + * table.custom.file.small.compress.threshold than the faster compression type will be used. The faster compression type is specified in + * table.custom.file.small.compress.type. Otherwise, the normal table compression will be used. + * + */ +public class TwoTierCompactionStrategy extends DefaultCompactionStrategy { + + /** + * Threshold memory in bytes. Files smaller than this threshold will use table.custom.file.small.compress.type for compression + */ + public static final String TABLE_SMALL_FILE_COMPRESSION_THRESHOLD = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.small.compress.threshold"; + /** + * Type of compression to use if small threshold is surpassed. One of "gz","lzo","snappy", or "none" + */ + public static final String TABLE_SMALL_FILE_COMPRESSION_TYPE = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.small.compress.type"; + + /** + * Helper method to check for required table properties. + * + * @param objectsToVerify + * any objects that shouldn't be null + * @throws IllegalArgumentException + * if any object in {@code objectsToVerify} is null + * + */ + public void verifyRequiredProperties(Object... objectsToVerify) throws IllegalArgumentException { + for (Object obj : objectsToVerify) { + if (obj == null) { + throw new IllegalArgumentException("Missing required Table properties for " + this.getClass().getName()); + } + } + } + + @Override + public boolean shouldCompact(MajorCompactionRequest request) { + return super.shouldCompact(request); + } + + @Override + public void gatherInformation(MajorCompactionRequest request) throws IOException { + super.gatherInformation(request); + } + + @Override + public CompactionPlan getCompactionPlan(MajorCompactionRequest request) { + CompactionPlan plan = super.getCompactionPlan(request); + plan.writeParameters = new WriteParameters(); + Map tableProperties = request.getTableProperties(); + verifyRequiredProperties(tableProperties); + + String smallFileCompressionType = tableProperties.get(TABLE_SMALL_FILE_COMPRESSION_TYPE); + String threshold = tableProperties.get(TABLE_SMALL_FILE_COMPRESSION_THRESHOLD); + verifyRequiredProperties(smallFileCompressionType, threshold); + Long smallFileCompressionThreshold = AccumuloConfiguration.getMemoryInBytes(threshold); + + long totalSize = 0; + for (Entry entry : request.getFiles().entrySet()) { + totalSize += entry.getValue().getSize(); + } + if (totalSize < smallFileCompressionThreshold) { + plan.writeParameters.setCompressType(smallFileCompressionType); + } + return plan; + } + +} diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java new file mode 100644 index 00000000000..c91e8c0cf1f --- /dev/null +++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.tserver.compaction; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import org.apache.accumulo.core.conf.AccumuloConfiguration; +import org.apache.accumulo.core.conf.ConfigurationCopy; +import org.apache.accumulo.core.data.impl.KeyExtent; +import org.apache.accumulo.core.metadata.schema.DataFileValue; +import org.apache.accumulo.server.fs.FileRef; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy + */ +public class TwoTierCompactionStrategyTest { + private String smallCompressionType = "snappy"; + private TwoTierCompactionStrategy ttcs = null; + private MajorCompactionRequest mcr = null; + private AccumuloConfiguration conf = null; + + private Map createFileMap(String... sa) { + + HashMap ret = new HashMap<>(); + for (int i = 0; i < sa.length; i += 2) { + ret.put(new FileRef("hdfs://nn1/accumulo/tables/5/t-0001/" + sa[i]), new DataFileValue(AccumuloConfiguration.getMemoryInBytes(sa[i + 1]), 1)); + } + + return ret; + } + + private AccumuloConfiguration createProperTableConfiguration() { + ConfigurationCopy result = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration()); + result.set(TwoTierCompactionStrategy.TABLE_SMALL_FILE_COMPRESSION_TYPE, smallCompressionType); + result.set(TwoTierCompactionStrategy.TABLE_SMALL_FILE_COMPRESSION_THRESHOLD, "500M"); + return result; + } + + @Before + public void setup() { + ttcs = new TwoTierCompactionStrategy(); + + } + + @Test + public void testDefaultCompaction() throws IOException { + conf = createProperTableConfiguration(); + KeyExtent ke = new KeyExtent("0", null, null); + mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); + Map fileMap = createFileMap("f1", "2G", "f2", "2G", "f3", "2G", "f4", "2G"); + mcr.setFiles(fileMap); + + Assert.assertTrue(ttcs.shouldCompact(mcr)); + Assert.assertEquals(fileMap.keySet(), new HashSet<>(ttcs.getCompactionPlan(mcr).inputFiles)); + Assert.assertEquals(4, mcr.getFiles().size()); + Assert.assertEquals(null, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); + } + + @Test + public void testSmallCompaction() throws IOException { + conf = createProperTableConfiguration(); + KeyExtent ke = new KeyExtent("0", null, null); + mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); + Map fileMap = createFileMap("f1", "10M", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "100M", "f6", "100M", "f7", "100M", "f8", + "100M"); + mcr.setFiles(fileMap); + + Assert.assertTrue(ttcs.shouldCompact(mcr)); + Assert.assertEquals(fileMap.keySet(), new HashSet<>(ttcs.getCompactionPlan(mcr).inputFiles)); + Assert.assertEquals(8, mcr.getFiles().size()); + Assert.assertEquals(smallCompressionType, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); + } + + @Test + public void testMissingConfigProperties() { + conf = AccumuloConfiguration.getDefaultConfiguration(); + KeyExtent ke = new KeyExtent("0", null, null); + mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); + Map fileMap = createFileMap("f1", "10M", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "100M", "f6", "100M", "f7", "100M", "f8", + "100M"); + mcr.setFiles(fileMap); + + try { + ttcs.getCompactionPlan(mcr); + Assert.assertTrue("IllegalArgumentException should have been thrown.", false); + } catch (IllegalArgumentException iae) {} catch (Throwable t) { + Assert.assertTrue("IllegalArgumentException should have been thrown.", false); + } + } + +} From 7cb6c662ece62aabe021dbf9ecf741f8ff909768 Mon Sep 17 00:00:00 2001 From: milleruntime Date: Fri, 29 Jul 2016 09:05:30 -0400 Subject: [PATCH 2/8] ACCUMULO-1787: flipped TwoTierCompactionStrategy to use large compression when file size greater than threshold --- .../compaction/TwoTierCompactionStrategy.java | 28 ++++++++++--------- .../TwoTierCompactionStrategyTest.java | 22 +++++++-------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java index 792d32d5a21..901709607c0 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java @@ -26,21 +26,23 @@ import org.apache.accumulo.server.fs.FileRef; /** - * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is less than - * table.custom.file.small.compress.threshold than the faster compression type will be used. The faster compression type is specified in - * table.custom.file.small.compress.type. Otherwise, the normal table compression will be used. + * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is larger than + * table.custom.file.large.compress.threshold than the larger compression type will be used. The larger compression type is specified in + * table.custom.file.large.compress.type. Otherwise, the configured table compression will be used. * + * NOTE: To use this strategy with Minor Compactions set table.file.compress.type=snappy and set a different compress type in + * table.custom.file.large.compress.type for larger files. */ public class TwoTierCompactionStrategy extends DefaultCompactionStrategy { /** - * Threshold memory in bytes. Files smaller than this threshold will use table.custom.file.small.compress.type for compression + * Threshold memory in bytes. Files larger than this threshold will use table.custom.file.large.compress.type for compression */ - public static final String TABLE_SMALL_FILE_COMPRESSION_THRESHOLD = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.small.compress.threshold"; + public static final String TABLE_LARGE_FILE_COMPRESSION_THRESHOLD = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.large.compress.threshold"; /** - * Type of compression to use if small threshold is surpassed. One of "gz","lzo","snappy", or "none" + * Type of compression to use if large threshold is surpassed. One of "gz","lzo","snappy", or "none" */ - public static final String TABLE_SMALL_FILE_COMPRESSION_TYPE = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.small.compress.type"; + public static final String TABLE_LARGE_FILE_COMPRESSION_TYPE = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.large.compress.type"; /** * Helper method to check for required table properties. @@ -76,17 +78,17 @@ public CompactionPlan getCompactionPlan(MajorCompactionRequest request) { Map tableProperties = request.getTableProperties(); verifyRequiredProperties(tableProperties); - String smallFileCompressionType = tableProperties.get(TABLE_SMALL_FILE_COMPRESSION_TYPE); - String threshold = tableProperties.get(TABLE_SMALL_FILE_COMPRESSION_THRESHOLD); - verifyRequiredProperties(smallFileCompressionType, threshold); - Long smallFileCompressionThreshold = AccumuloConfiguration.getMemoryInBytes(threshold); + String largeFileCompressionType = tableProperties.get(TABLE_LARGE_FILE_COMPRESSION_TYPE); + String threshold = tableProperties.get(TABLE_LARGE_FILE_COMPRESSION_THRESHOLD); + verifyRequiredProperties(largeFileCompressionType, threshold); + Long largeFileCompressionThreshold = AccumuloConfiguration.getMemoryInBytes(threshold); long totalSize = 0; for (Entry entry : request.getFiles().entrySet()) { totalSize += entry.getValue().getSize(); } - if (totalSize < smallFileCompressionThreshold) { - plan.writeParameters.setCompressType(smallFileCompressionType); + if (totalSize > largeFileCompressionThreshold) { + plan.writeParameters.setCompressType(largeFileCompressionType); } return plan; } diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java index c91e8c0cf1f..8d1de2fffd7 100644 --- a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java +++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java @@ -34,7 +34,7 @@ * Tests org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy */ public class TwoTierCompactionStrategyTest { - private String smallCompressionType = "snappy"; + private String largeCompressionType = "gz"; private TwoTierCompactionStrategy ttcs = null; private MajorCompactionRequest mcr = null; private AccumuloConfiguration conf = null; @@ -51,15 +51,14 @@ private Map createFileMap(String... sa) { private AccumuloConfiguration createProperTableConfiguration() { ConfigurationCopy result = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration()); - result.set(TwoTierCompactionStrategy.TABLE_SMALL_FILE_COMPRESSION_TYPE, smallCompressionType); - result.set(TwoTierCompactionStrategy.TABLE_SMALL_FILE_COMPRESSION_THRESHOLD, "500M"); + result.set(TwoTierCompactionStrategy.TABLE_LARGE_FILE_COMPRESSION_TYPE, largeCompressionType); + result.set(TwoTierCompactionStrategy.TABLE_LARGE_FILE_COMPRESSION_THRESHOLD, "500M"); return result; } @Before public void setup() { ttcs = new TwoTierCompactionStrategy(); - } @Test @@ -67,28 +66,29 @@ public void testDefaultCompaction() throws IOException { conf = createProperTableConfiguration(); KeyExtent ke = new KeyExtent("0", null, null); mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); - Map fileMap = createFileMap("f1", "2G", "f2", "2G", "f3", "2G", "f4", "2G"); + Map fileMap = createFileMap("f1", "10M", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "100M", "f6", "100M", "f7", "100M", "f8", + "100M"); mcr.setFiles(fileMap); Assert.assertTrue(ttcs.shouldCompact(mcr)); Assert.assertEquals(fileMap.keySet(), new HashSet<>(ttcs.getCompactionPlan(mcr).inputFiles)); - Assert.assertEquals(4, mcr.getFiles().size()); + Assert.assertEquals(8, mcr.getFiles().size()); Assert.assertEquals(null, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); } @Test - public void testSmallCompaction() throws IOException { + public void testLargeCompaction() throws IOException { conf = createProperTableConfiguration(); KeyExtent ke = new KeyExtent("0", null, null); mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); - Map fileMap = createFileMap("f1", "10M", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "100M", "f6", "100M", "f7", "100M", "f8", - "100M"); + Map fileMap = createFileMap("f1", "2G", "f2", "2G", "f3", "2G", "f4", "2G"); + mcr.setFiles(fileMap); Assert.assertTrue(ttcs.shouldCompact(mcr)); Assert.assertEquals(fileMap.keySet(), new HashSet<>(ttcs.getCompactionPlan(mcr).inputFiles)); - Assert.assertEquals(8, mcr.getFiles().size()); - Assert.assertEquals(smallCompressionType, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); + Assert.assertEquals(4, mcr.getFiles().size()); + Assert.assertEquals(largeCompressionType, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); } @Test From 0e98c8701800eab5abe159e7534ea9eba0270a97 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Tue, 16 Aug 2016 14:44:05 -0400 Subject: [PATCH 3/8] ACCUMULO-1787: created calculateTotalSize to get total size from plan --- .../compaction/TwoTierCompactionStrategy.java | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java index 901709607c0..5115e8c90db 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java @@ -19,11 +19,14 @@ import java.io.IOException; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.server.fs.FileRef; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; /** * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is larger than @@ -61,6 +64,18 @@ public void verifyRequiredProperties(Object... objectsToVerify) throws IllegalAr } } + /** + * Calculates the total size of input files in the compaction plan + */ + private Long calculateTotalSize(MajorCompactionRequest request, CompactionPlan plan) { + long totalSize = 0; + Map allFiles = request.getFiles(); + for (FileRef fileRef : plan.inputFiles) { + totalSize += allFiles.get(fileRef).getSize(); + } + return totalSize; + } + @Override public boolean shouldCompact(MajorCompactionRequest request) { return super.shouldCompact(request); @@ -82,11 +97,8 @@ public CompactionPlan getCompactionPlan(MajorCompactionRequest request) { String threshold = tableProperties.get(TABLE_LARGE_FILE_COMPRESSION_THRESHOLD); verifyRequiredProperties(largeFileCompressionType, threshold); Long largeFileCompressionThreshold = AccumuloConfiguration.getMemoryInBytes(threshold); + Long totalSize = calculateTotalSize(request, plan); - long totalSize = 0; - for (Entry entry : request.getFiles().entrySet()) { - totalSize += entry.getValue().getSize(); - } if (totalSize > largeFileCompressionThreshold) { plan.writeParameters.setCompressType(largeFileCompressionType); } From 277914298099d883a904a8992574c62393fab67f Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Tue, 16 Aug 2016 16:07:17 -0400 Subject: [PATCH 4/8] ACCUMULO-1787: added a new test to TwoTierCompactionStrategyTest --- .../compaction/TwoTierCompactionStrategy.java | 4 --- .../TwoTierCompactionStrategyTest.java | 30 +++++++++++++++++-- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java index 5115e8c90db..75bf3ada965 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java @@ -18,15 +18,11 @@ import java.io.IOException; import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.server.fs.FileRef; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; /** * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is larger than diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java index 8d1de2fffd7..6ff315998af 100644 --- a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java +++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import org.apache.accumulo.core.conf.AccumuloConfiguration; @@ -71,8 +72,11 @@ public void testDefaultCompaction() throws IOException { mcr.setFiles(fileMap); Assert.assertTrue(ttcs.shouldCompact(mcr)); - Assert.assertEquals(fileMap.keySet(), new HashSet<>(ttcs.getCompactionPlan(mcr).inputFiles)); Assert.assertEquals(8, mcr.getFiles().size()); + + List filesToCompact = ttcs.getCompactionPlan(mcr).inputFiles; + Assert.assertEquals(fileMap.keySet(), new HashSet<>(filesToCompact)); + Assert.assertEquals(8, filesToCompact.size()); Assert.assertEquals(null, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); } @@ -82,12 +86,14 @@ public void testLargeCompaction() throws IOException { KeyExtent ke = new KeyExtent("0", null, null); mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); Map fileMap = createFileMap("f1", "2G", "f2", "2G", "f3", "2G", "f4", "2G"); - mcr.setFiles(fileMap); Assert.assertTrue(ttcs.shouldCompact(mcr)); - Assert.assertEquals(fileMap.keySet(), new HashSet<>(ttcs.getCompactionPlan(mcr).inputFiles)); Assert.assertEquals(4, mcr.getFiles().size()); + + List filesToCompact = ttcs.getCompactionPlan(mcr).inputFiles; + Assert.assertEquals(fileMap.keySet(), new HashSet<>(filesToCompact)); + Assert.assertEquals(4, filesToCompact.size()); Assert.assertEquals(largeCompressionType, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); } @@ -108,4 +114,22 @@ public void testMissingConfigProperties() { } } + @Test + public void testFileSubsetCompaction() throws IOException { + conf = createProperTableConfiguration(); + KeyExtent ke = new KeyExtent("0", null, null); + mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); + Map fileMap = createFileMap("f1", "1G", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "10M", "f6", "10M", "f7", "10M"); + Map filesToCompactMap = createFileMap("f2", "10M", "f3", "10M", "f4", "10M", "f5", "10M", "f6", "10M", "f7", "10M"); + mcr.setFiles(fileMap); + + Assert.assertTrue(ttcs.shouldCompact(mcr)); + Assert.assertEquals(7, mcr.getFiles().size()); + + List filesToCompact = ttcs.getCompactionPlan(mcr).inputFiles; + Assert.assertEquals(filesToCompactMap.keySet(), new HashSet<>(filesToCompact)); + Assert.assertEquals(6, filesToCompact.size()); + Assert.assertEquals(null, ttcs.getCompactionPlan(mcr).writeParameters.getCompressType()); + } + } From 09c25a00686cbe1f7e5a0b7703f93acba4f7c1cc Mon Sep 17 00:00:00 2001 From: milleruntime Date: Wed, 31 Aug 2016 15:53:45 -0400 Subject: [PATCH 5/8] ACCUMULO-1787: created README.compactionStrategy and added logging to TwoTierCompactionStrategy --- .../examples/README.compactionStrategy | 54 +++++++++++++++++++ .../compaction/TwoTierCompactionStrategy.java | 10 +++- 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 docs/src/main/resources/examples/README.compactionStrategy diff --git a/docs/src/main/resources/examples/README.compactionStrategy b/docs/src/main/resources/examples/README.compactionStrategy new file mode 100644 index 00000000000..a76f993e2f0 --- /dev/null +++ b/docs/src/main/resources/examples/README.compactionStrategy @@ -0,0 +1,54 @@ +Title: Apache Accumulo Customizing the Compaction Strategy +Notice: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +This tutorial uses the following Java classes, which can be found in org.apache.accumulo.tserver.compaction: + + * DefaultCompactionStrategy.java - determines which files to compact based on table.compaction.major.ratio and table.file.max + * EverythingCompactionStrategy.java - compacts all files + * SizeLimitCompactionStrategy.java - compacts files no bigger than table.compacations.major.strategy.opts.sizeLimit + * TwoTierCompactionStrategy.java - uses default compression for smaller files and table.custom.file.large.compress.type for larger files + +This is an example of how to configure a compaction strategy. By default Accumulo will always use the DefaultCompactionStrategy, unless +these steps are taken to change the configuration. Use the strategy and settings that best fits your Accumulo setup. + +The commands below will configure the TwoTierCompactionStrategy to use gz compression for files larger than 10M. + + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.custom.file.large.compress.threshold=10M" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.custom.file.large.compress.type=gz" + +The command below sets the compression for smaller files and minor compactions. + + $ ./bin/accumulo shell -u root -p secret -e "config -s table.file.compress.type=snappy" + +To test the strategy works run the commands: + + $ ./bin/accumulo shell -u root -p secret -e "createtable test1" + $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 10000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 + $ ./bin/accumulo shell -u root -p secret -e "compact -t test1" + +View the tserver log in /logs for the compaction and find the name of the that was recently compacted. Print info about this file using the PrintInfo tool: + + $ bin/accumulo org.apache.accumulo.core.file.rfile.PrintInfo + +Details about the rfile will be printed and the compression type should match the type used in the compaction... +Meta block : RFile.index + Raw size : 512 bytes + Compressed size : 278 bytes + Compression type : gz + diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java index 75bf3ada965..ca8c0b4dd77 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java @@ -23,6 +23,7 @@ import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.server.fs.FileRef; +import org.apache.log4j.Logger; /** * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is larger than @@ -33,7 +34,7 @@ * table.custom.file.large.compress.type for larger files. */ public class TwoTierCompactionStrategy extends DefaultCompactionStrategy { - + private final Logger log = Logger.getLogger(TwoTierCompactionStrategy.class); /** * Threshold memory in bytes. Files larger than this threshold will use table.custom.file.large.compress.type for compression */ @@ -55,7 +56,8 @@ public class TwoTierCompactionStrategy extends DefaultCompactionStrategy { public void verifyRequiredProperties(Object... objectsToVerify) throws IllegalArgumentException { for (Object obj : objectsToVerify) { if (obj == null) { - throw new IllegalArgumentException("Missing required Table properties for " + this.getClass().getName()); + throw new IllegalArgumentException("Missing required Table properties (" + TABLE_LARGE_FILE_COMPRESSION_TYPE + " and/or " + + TABLE_LARGE_FILE_COMPRESSION_THRESHOLD + ") for " + this.getClass().getName()); } } } @@ -96,6 +98,10 @@ public CompactionPlan getCompactionPlan(MajorCompactionRequest request) { Long totalSize = calculateTotalSize(request, plan); if (totalSize > largeFileCompressionThreshold) { + if (log.isDebugEnabled()) { + log.debug("Changed compressType to " + largeFileCompressionType + ": totalSize(" + totalSize + ") was greater than threshold " + + largeFileCompressionThreshold); + } plan.writeParameters.setCompressType(largeFileCompressionType); } return plan; From 152005b586254f13ab3068b1652eadead8b86e9b Mon Sep 17 00:00:00 2001 From: milleruntime Date: Fri, 2 Sep 2016 15:39:57 -0400 Subject: [PATCH 6/8] ACCUMULO-1787: reworked Compaction Strategy to use init(opts) --- .../examples/README.compactionStrategy | 26 ++++++++------ .../compaction/TwoTierCompactionStrategy.java | 34 +++++++++++-------- .../TwoTierCompactionStrategyTest.java | 30 ++++++---------- 3 files changed, 46 insertions(+), 44 deletions(-) diff --git a/docs/src/main/resources/examples/README.compactionStrategy b/docs/src/main/resources/examples/README.compactionStrategy index a76f993e2f0..a3f3f3a792b 100644 --- a/docs/src/main/resources/examples/README.compactionStrategy +++ b/docs/src/main/resources/examples/README.compactionStrategy @@ -26,25 +26,31 @@ This tutorial uses the following Java classes, which can be found in org.apache. This is an example of how to configure a compaction strategy. By default Accumulo will always use the DefaultCompactionStrategy, unless these steps are taken to change the configuration. Use the strategy and settings that best fits your Accumulo setup. -The commands below will configure the TwoTierCompactionStrategy to use gz compression for files larger than 10M. - - $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy" - $ ./bin/accumulo shell -u root -p secret -e "config -s table.custom.file.large.compress.threshold=10M" - $ ./bin/accumulo shell -u root -p secret -e "config -s table.custom.file.large.compress.type=gz" - The command below sets the compression for smaller files and minor compactions. $ ./bin/accumulo shell -u root -p secret -e "config -s table.file.compress.type=snappy" -To test the strategy works run the commands: +The commands below will configure the TwoTierCompactionStrategy to use gz compression for files larger than 1M. + + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.threshold=1M" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.type=gz" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy" + +Generate some data and files in order to test the strategy: $ ./bin/accumulo shell -u root -p secret -e "createtable test1" $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 10000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 - $ ./bin/accumulo shell -u root -p secret -e "compact -t test1" + $ ./bin/accumulo shell -u root -p secret -e "flush -t test1" + $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 11000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 + $ ./bin/accumulo shell -u root -p secret -e "flush -t test1" + $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 12000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 + $ ./bin/accumulo shell -u root -p secret -e "flush -t test1" + $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 13000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 + $ ./bin/accumulo shell -u root -p secret -e "flush -t test1" -View the tserver log in /logs for the compaction and find the name of the that was recently compacted. Print info about this file using the PrintInfo tool: +View the tserver log in /logs for the compaction and find the name of the that was compacted for your table. Print info about this file using the PrintInfo tool: - $ bin/accumulo org.apache.accumulo.core.file.rfile.PrintInfo + $ ./bin/accumulo org.apache.accumulo.core.file.rfile.PrintInfo Details about the rfile will be printed and the compression type should match the type used in the compaction... Meta block : RFile.index diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java index ca8c0b4dd77..a3877b0ef1d 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategy.java @@ -27,22 +27,25 @@ /** * A hybrid compaction strategy that supports two types of compression. If total size of files being compacted is larger than - * table.custom.file.large.compress.threshold than the larger compression type will be used. The larger compression type is specified in - * table.custom.file.large.compress.type. Otherwise, the configured table compression will be used. + * table.majc.compaction.strategy.opts.file.large.compress.threshold than the larger compression type will be used. The larger compression type is + * specified in table.majc.compaction.strategy.opts.file.large.compress.type. Otherwise, the configured table compression will be used. * * NOTE: To use this strategy with Minor Compactions set table.file.compress.type=snappy and set a different compress type in - * table.custom.file.large.compress.type for larger files. + * table.majc.compaction.strategy.opts.file.large.compress.type for larger files. */ public class TwoTierCompactionStrategy extends DefaultCompactionStrategy { private final Logger log = Logger.getLogger(TwoTierCompactionStrategy.class); /** - * Threshold memory in bytes. Files larger than this threshold will use table.custom.file.large.compress.type for compression + * Threshold memory in bytes. Files larger than this threshold will use table.majc.compaction.strategy.opts.file.large.compress.type for compression */ - public static final String TABLE_LARGE_FILE_COMPRESSION_THRESHOLD = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.large.compress.threshold"; + public static final String LARGE_FILE_COMPRESSION_THRESHOLD = "file.large.compress.threshold"; + private Long largeFileCompressionThreshold; + /** * Type of compression to use if large threshold is surpassed. One of "gz","lzo","snappy", or "none" */ - public static final String TABLE_LARGE_FILE_COMPRESSION_TYPE = Property.TABLE_ARBITRARY_PROP_PREFIX.getKey() + "file.large.compress.type"; + public static final String LARGE_FILE_COMPRESSION_TYPE = "file.large.compress.type"; + private String largeFileCompressionType; /** * Helper method to check for required table properties. @@ -56,8 +59,8 @@ public class TwoTierCompactionStrategy extends DefaultCompactionStrategy { public void verifyRequiredProperties(Object... objectsToVerify) throws IllegalArgumentException { for (Object obj : objectsToVerify) { if (obj == null) { - throw new IllegalArgumentException("Missing required Table properties (" + TABLE_LARGE_FILE_COMPRESSION_TYPE + " and/or " - + TABLE_LARGE_FILE_COMPRESSION_THRESHOLD + ") for " + this.getClass().getName()); + throw new IllegalArgumentException("Missing required " + Property.TABLE_COMPACTION_STRATEGY_PREFIX + " (" + LARGE_FILE_COMPRESSION_TYPE + " and/or " + + LARGE_FILE_COMPRESSION_THRESHOLD + ") for " + this.getClass().getName()); } } } @@ -74,6 +77,14 @@ private Long calculateTotalSize(MajorCompactionRequest request, CompactionPlan p return totalSize; } + @Override + public void init(Map options) { + String threshold = options.get(LARGE_FILE_COMPRESSION_THRESHOLD); + largeFileCompressionType = options.get(LARGE_FILE_COMPRESSION_TYPE); + verifyRequiredProperties(threshold, largeFileCompressionType); + largeFileCompressionThreshold = AccumuloConfiguration.getMemoryInBytes(threshold); + } + @Override public boolean shouldCompact(MajorCompactionRequest request) { return super.shouldCompact(request); @@ -88,13 +99,6 @@ public void gatherInformation(MajorCompactionRequest request) throws IOException public CompactionPlan getCompactionPlan(MajorCompactionRequest request) { CompactionPlan plan = super.getCompactionPlan(request); plan.writeParameters = new WriteParameters(); - Map tableProperties = request.getTableProperties(); - verifyRequiredProperties(tableProperties); - - String largeFileCompressionType = tableProperties.get(TABLE_LARGE_FILE_COMPRESSION_TYPE); - String threshold = tableProperties.get(TABLE_LARGE_FILE_COMPRESSION_THRESHOLD); - verifyRequiredProperties(largeFileCompressionType, threshold); - Long largeFileCompressionThreshold = AccumuloConfiguration.getMemoryInBytes(threshold); Long totalSize = calculateTotalSize(request, plan); if (totalSize > largeFileCompressionThreshold) { diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java index 6ff315998af..6fb37dacf99 100644 --- a/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java +++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/compaction/TwoTierCompactionStrategyTest.java @@ -23,7 +23,6 @@ import java.util.Map; import org.apache.accumulo.core.conf.AccumuloConfiguration; -import org.apache.accumulo.core.conf.ConfigurationCopy; import org.apache.accumulo.core.data.impl.KeyExtent; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.server.fs.FileRef; @@ -39,6 +38,7 @@ public class TwoTierCompactionStrategyTest { private TwoTierCompactionStrategy ttcs = null; private MajorCompactionRequest mcr = null; private AccumuloConfiguration conf = null; + private HashMap opts = new HashMap<>(); private Map createFileMap(String... sa) { @@ -50,21 +50,17 @@ private Map createFileMap(String... sa) { return ret; } - private AccumuloConfiguration createProperTableConfiguration() { - ConfigurationCopy result = new ConfigurationCopy(AccumuloConfiguration.getDefaultConfiguration()); - result.set(TwoTierCompactionStrategy.TABLE_LARGE_FILE_COMPRESSION_TYPE, largeCompressionType); - result.set(TwoTierCompactionStrategy.TABLE_LARGE_FILE_COMPRESSION_THRESHOLD, "500M"); - return result; - } - @Before public void setup() { + opts.put(TwoTierCompactionStrategy.LARGE_FILE_COMPRESSION_TYPE, largeCompressionType); + opts.put(TwoTierCompactionStrategy.LARGE_FILE_COMPRESSION_THRESHOLD, "500M"); ttcs = new TwoTierCompactionStrategy(); } @Test public void testDefaultCompaction() throws IOException { - conf = createProperTableConfiguration(); + ttcs.init(opts); + conf = AccumuloConfiguration.getDefaultConfiguration(); KeyExtent ke = new KeyExtent("0", null, null); mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); Map fileMap = createFileMap("f1", "10M", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "100M", "f6", "100M", "f7", "100M", "f8", @@ -82,7 +78,8 @@ public void testDefaultCompaction() throws IOException { @Test public void testLargeCompaction() throws IOException { - conf = createProperTableConfiguration(); + ttcs.init(opts); + conf = AccumuloConfiguration.getDefaultConfiguration(); KeyExtent ke = new KeyExtent("0", null, null); mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); Map fileMap = createFileMap("f1", "2G", "f2", "2G", "f3", "2G", "f4", "2G"); @@ -99,15 +96,9 @@ public void testLargeCompaction() throws IOException { @Test public void testMissingConfigProperties() { - conf = AccumuloConfiguration.getDefaultConfiguration(); - KeyExtent ke = new KeyExtent("0", null, null); - mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); - Map fileMap = createFileMap("f1", "10M", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "100M", "f6", "100M", "f7", "100M", "f8", - "100M"); - mcr.setFiles(fileMap); - try { - ttcs.getCompactionPlan(mcr); + opts.clear(); + ttcs.init(opts); Assert.assertTrue("IllegalArgumentException should have been thrown.", false); } catch (IllegalArgumentException iae) {} catch (Throwable t) { Assert.assertTrue("IllegalArgumentException should have been thrown.", false); @@ -116,7 +107,8 @@ public void testMissingConfigProperties() { @Test public void testFileSubsetCompaction() throws IOException { - conf = createProperTableConfiguration(); + ttcs.init(opts); + conf = AccumuloConfiguration.getDefaultConfiguration(); KeyExtent ke = new KeyExtent("0", null, null); mcr = new MajorCompactionRequest(ke, MajorCompactionReason.NORMAL, null, conf); Map fileMap = createFileMap("f1", "1G", "f2", "10M", "f3", "10M", "f4", "10M", "f5", "10M", "f6", "10M", "f7", "10M"); From 8f6c1ef3211e760207c404f81ab911fe0968df9a Mon Sep 17 00:00:00 2001 From: milleruntime Date: Tue, 6 Sep 2016 12:13:40 -0400 Subject: [PATCH 7/8] ACCUMULO-1787: added compaction strategy to user manual --- .../asciidoc/chapters/table_configuration.txt | 22 +++++++++++++++++++ .../examples/README.compactionStrategy | 4 ++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/docs/src/main/asciidoc/chapters/table_configuration.txt b/docs/src/main/asciidoc/chapters/table_configuration.txt index 5c62ccf65df..28075e23b95 100644 --- a/docs/src/main/asciidoc/chapters/table_configuration.txt +++ b/docs/src/main/asciidoc/chapters/table_configuration.txt @@ -454,6 +454,28 @@ table. In 1.4 the ability to compact a range of a table was added. To use this feature specify start and stop rows for the compact command. This will only compact tablets that overlap the given row range. +==== Compaction Strategies + +The default behavior of major compactions is defined in the class DefaultCompactionStrategy. +This behavior can be changed by overriding the following property with a fully qualified class name: + + table.majc.compaction.strategy + +Custom compaction strategies can have additional properties that are specified following the prefix property: + + table.majc.compaction.strategy.opts.* + +Accumulo provides a few classes that can be used as an alternative compaction strategy. These classes are located in the +org.apache.accumulo.tserver.compaction.* package. EverythingCompactionStrategy will simply compact all files. This is the +strategy used by the user "compact" command. SizeLimitCompactionStrategy compacts files no bigger than the limit set in the +property table.majc.compaction.strategy.opts.sizeLimit. + +TwoTierCompactionStrategy is a hybrid compaction strategy that supports two types of compression. If the total size of +files being compacted is larger than table.majc.compaction.strategy.opts.file.large.compress.threshold than a larger +compression type will be used. The larger compression type is specified in table.majc.compaction.strategy.opts.file.large.compress.type. +Otherwise, the configured table compression will be used. To use this strategy with minor compactions set table.file.compress.type=snappy +and set a different compress type in table.majc.compaction.strategy.opts.file.large.compress.type for larger files. + === Pre-splitting tables Accumulo will balance and distribute tables across servers. Before a diff --git a/docs/src/main/resources/examples/README.compactionStrategy b/docs/src/main/resources/examples/README.compactionStrategy index a3f3f3a792b..b6fd279d360 100644 --- a/docs/src/main/resources/examples/README.compactionStrategy +++ b/docs/src/main/resources/examples/README.compactionStrategy @@ -20,8 +20,8 @@ This tutorial uses the following Java classes, which can be found in org.apache. * DefaultCompactionStrategy.java - determines which files to compact based on table.compaction.major.ratio and table.file.max * EverythingCompactionStrategy.java - compacts all files - * SizeLimitCompactionStrategy.java - compacts files no bigger than table.compacations.major.strategy.opts.sizeLimit - * TwoTierCompactionStrategy.java - uses default compression for smaller files and table.custom.file.large.compress.type for larger files + * SizeLimitCompactionStrategy.java - compacts files no bigger than table.majc.compaction.strategy.opts.sizeLimit + * TwoTierCompactionStrategy.java - uses default compression for smaller files and table.majc.compaction.strategy.opts.file.large.compress.type for larger files This is an example of how to configure a compaction strategy. By default Accumulo will always use the DefaultCompactionStrategy, unless these steps are taken to change the configuration. Use the strategy and settings that best fits your Accumulo setup. From 1bcfc87fdfcc34c520827a066affec9e50c704cb Mon Sep 17 00:00:00 2001 From: milleruntime Date: Tue, 6 Sep 2016 15:42:49 -0400 Subject: [PATCH 8/8] ACCUMULO-1787: some minor corrections to README.compactionStrategy --- .../examples/README.compactionStrategy | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/src/main/resources/examples/README.compactionStrategy b/docs/src/main/resources/examples/README.compactionStrategy index b6fd279d360..344080b598c 100644 --- a/docs/src/main/resources/examples/README.compactionStrategy +++ b/docs/src/main/resources/examples/README.compactionStrategy @@ -24,21 +24,26 @@ This tutorial uses the following Java classes, which can be found in org.apache. * TwoTierCompactionStrategy.java - uses default compression for smaller files and table.majc.compaction.strategy.opts.file.large.compress.type for larger files This is an example of how to configure a compaction strategy. By default Accumulo will always use the DefaultCompactionStrategy, unless -these steps are taken to change the configuration. Use the strategy and settings that best fits your Accumulo setup. +these steps are taken to change the configuration. Use the strategy and settings that best fits your Accumulo setup. This example shows +how to configure and test one of the more complicated strategies, the TwoTierCompactionStrategy. Note that this example requires hadoop +native libraries built with snappy in order to use snappy compression. -The command below sets the compression for smaller files and minor compactions. +To begin, run the command to create a table for testing: - $ ./bin/accumulo shell -u root -p secret -e "config -s table.file.compress.type=snappy" + $ ./bin/accumulo shell -u root -p secret -e "createtable test1" + +The command below sets the compression for smaller files and minor compactions for that table. + + $ ./bin/accumulo shell -u root -p secret -e "config -s table.file.compress.type=snappy -t test1" The commands below will configure the TwoTierCompactionStrategy to use gz compression for files larger than 1M. - $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.threshold=1M" - $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.type=gz" - $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.threshold=1M -t test1" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy.opts.file.large.compress.type=gz -t test1" + $ ./bin/accumulo shell -u root -p secret -e "config -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.TwoTierCompactionStrategy -t test1" Generate some data and files in order to test the strategy: - $ ./bin/accumulo shell -u root -p secret -e "createtable test1" $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 10000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 $ ./bin/accumulo shell -u root -p secret -e "flush -t test1" $ ./bin/accumulo org.apache.accumulo.examples.simple.client.SequentialBatchWriter -i instance17 -z localhost:2181 -u root -p secret -t test1 --start 0 --num 11000 --size 50 --batchMemory 20M --batchLatency 500 --batchThreads 20 @@ -50,7 +55,7 @@ Generate some data and files in order to test the strategy: View the tserver log in /logs for the compaction and find the name of the that was compacted for your table. Print info about this file using the PrintInfo tool: - $ ./bin/accumulo org.apache.accumulo.core.file.rfile.PrintInfo + $ ./bin/accumulo rfile-info Details about the rfile will be printed and the compression type should match the type used in the compaction... Meta block : RFile.index