Permalink
Browse files

HBASE-7383 create integration test for HBASE-5416 (improving scan per…

…formance for certain filters) (Sergey)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1433224 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 19e1c78 commit c6a7b2088fd1c45a22d182bc4bb1063f4511495a Zhihong Yu committed Jan 14, 2013
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.util.test;
+
+import java.util.Set;
+
+/**
+ * A generator of random data (keys/cfs/columns/values) for load testing.
+ * Contains LoadTestKVGenerator as a matter of convenience...
+ */
+public abstract class LoadTestDataGenerator {
+ protected final LoadTestKVGenerator kvGenerator;
+
+ /**
+ * Initializes the object.
+ * @param minValueSize minimum size of the value generated by
+ * {@link #generateValue(byte[], byte[], byte[])}.
+ * @param maxValueSize maximum size of the value generated by
+ * {@link #generateValue(byte[], byte[], byte[])}.
+ */
+ public LoadTestDataGenerator(int minValueSize, int maxValueSize) {
+ this.kvGenerator = new LoadTestKVGenerator(minValueSize, maxValueSize);
+ }
+
+ /**
+ * Generates a deterministic, unique hashed row key from a number. That way, the user can
+ * keep track of numbers, without messing with byte array and ensuring key distribution.
+ * @param keyBase Base number for a key, such as a loop counter.
+ */
+ public abstract byte[] getDeterministicUniqueKey(long keyBase);
+
+ /**
+ * Gets column families for the load test table.
+ * @return The array of byte[]s representing column family names.
+ */
+ public abstract byte[][] getColumnFamilies();
+
+ /**
+ * Generates an applicable set of columns to be used for a particular key and family.
+ * @param rowKey The row key to generate for.
+ * @param cf The column family name to generate for.
+ * @return The array of byte[]s representing column names.
+ */
+ public abstract byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf);
+
+ /**
+ * Generates a value to be used for a particular row/cf/column.
+ * @param rowKey The row key to generate for.
+ * @param cf The column family name to generate for.
+ * @param column The column name to generate for.
+ * @return The value to use.
+ */
+ public abstract byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column);
+
+ /**
+ * Checks that columns for a rowKey and cf are valid if generated via
+ * {@link #generateColumnsForCf(byte[], byte[])}
+ * @param rowKey The row key to verify for.
+ * @param cf The column family name to verify for.
+ * @param columnSet The column set (for example, encountered by read).
+ * @return True iff valid.
+ */
+ public abstract boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet);
+
+ /**
+ * Checks that value for a rowKey/cf/column is valid if generated via
+ * {@link #generateValue(byte[], byte[], byte[])}
+ * @param rowKey The row key to verify for.
+ * @param cf The column family name to verify for.
+ * @param column The column name to verify for.
+ * @param value The value (for example, encountered by read).
+ * @return True iff valid.
+ */
+ public abstract boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value);
+}
@@ -16,6 +16,7 @@
*/
package org.apache.hadoop.hbase.util.test;
+import java.util.Map;
import java.util.Random;
import org.apache.hadoop.hbase.util.Bytes;
@@ -27,8 +28,6 @@
* hash. Values are generated by selecting value size in the configured range
* and generating a pseudo-random sequence of bytes seeded by key, column
* qualifier, and value size.
- * <p>
- * Not thread-safe, so a separate instance is needed for every writer thread/
*/
public class LoadTestKVGenerator {
@@ -49,13 +48,13 @@ public LoadTestKVGenerator(int minValueSize, int maxValueSize) {
/**
* Verifies that the given byte array is the same as what would be generated
- * for the given row key and qualifier. We are assuming that the value size
- * is correct, and only verify the actual bytes. However, if the min/max
- * value sizes are set sufficiently high, an accidental match should be
+ * for the given seed strings (row/cf/column/...). We are assuming that the
+ * value size is correct, and only verify the actual bytes. However, if the
+ * min/max value sizes are set sufficiently high, an accidental match should be
* extremely improbable.
*/
- public static boolean verify(String rowKey, String qual, byte[] value) {
- byte[] expectedData = getValueForRowColumn(rowKey, qual, value.length);
+ public static boolean verify(byte[] value, byte[]... seedStrings) {
+ byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
return Bytes.equals(expectedData, value);
}
@@ -74,27 +73,31 @@ public static String md5PrefixedKey(long key) {
/**
* Generates a value for the given key index and column qualifier. Size is
* selected randomly in the configured range. The generated value depends
- * only on the combination of the key, qualifier, and the selected value
- * size. This allows to verify the actual value bytes when reading, as done
- * in {@link #verify(String, String, byte[])}.
+ * only on the combination of the strings passed (key/cf/column/...) and the selected
+ * value size. This allows to verify the actual value bytes when reading, as done
+ * in {#verify(byte[], byte[]...)}
+ * This method is as thread-safe as Random class. It appears that the worst bug ever
+ * found with the latter is that multiple threads will get some duplicate values, which
+ * we don't care about.
*/
- public byte[] generateRandomSizeValue(long key, String qual) {
- String rowKey = md5PrefixedKey(key);
+ public byte[] generateRandomSizeValue(byte[]... seedStrings) {
int dataSize = minValueSize;
- if(minValueSize != maxValueSize){
+ if(minValueSize != maxValueSize) {
dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
}
- return getValueForRowColumn(rowKey, qual, dataSize);
+ return getValueForRowColumn(dataSize, seedStrings);
}
/**
* Generates random bytes of the given size for the given row and column
* qualifier. The random seed is fully determined by these parameters.
*/
- private static byte[] getValueForRowColumn(String rowKey, String qual,
- int dataSize) {
- Random seededRandom = new Random(rowKey.hashCode() + qual.hashCode() +
- dataSize);
+ private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
+ long seed = dataSize;
+ for (byte[] str : seedStrings) {
+ seed += Bytes.toString(str).hashCode();
+ }
+ Random seededRandom = new Random(seed);
byte[] randomBytes = new byte[dataSize];
seededRandom.nextBytes(randomBytes);
return randomBytes;
@@ -41,8 +41,8 @@
@Test
public void testValueLength() {
for (int i = 0; i < 1000; ++i) {
- byte[] v = gen.generateRandomSizeValue(i,
- String.valueOf(rand.nextInt()));
+ byte[] v = gen.generateRandomSizeValue(Integer.toString(i).getBytes(),
+ String.valueOf(rand.nextInt()).getBytes());
assertTrue(MIN_LEN <= v.length);
assertTrue(v.length <= MAX_LEN);
}
@@ -52,12 +52,12 @@ public void testValueLength() {
public void testVerification() {
for (int i = 0; i < 1000; ++i) {
for (int qualIndex = 0; qualIndex < 20; ++qualIndex) {
- String qual = String.valueOf(qualIndex);
- byte[] v = gen.generateRandomSizeValue(i, qual);
- String rowKey = LoadTestKVGenerator.md5PrefixedKey(i);
- assertTrue(LoadTestKVGenerator.verify(rowKey, qual, v));
+ byte[] qual = String.valueOf(qualIndex).getBytes();
+ byte[] rowKey = LoadTestKVGenerator.md5PrefixedKey(i).getBytes();
+ byte[] v = gen.generateRandomSizeValue(rowKey, qual);
+ assertTrue(LoadTestKVGenerator.verify(v, rowKey, qual));
v[0]++;
- assertFalse(LoadTestKVGenerator.verify(rowKey, qual, v));
+ assertFalse(LoadTestKVGenerator.verify(v, rowKey, qual));
}
}
}
Oops, something went wrong.

0 comments on commit c6a7b20

Please sign in to comment.