Patch introduces the following changes:

1. Add “progressive” generators in tests (i.e., ones that start with simple schemas/data types, such as ones that use longs and progress to more complex ones, such as strings, doubles, and other data types, including reverse ones). 2. Fix support for reverse types in clustering columns 3. Remove adjustEntropyDomain and improve Float and Double data types, making them byte-ordered and switching them to use less entropy (3 bytes for float and 7 bytes for double). Patch by Alex Petrov, for CASSANDRA-15348
apache · Sep 21, 2020 · a8270c7 · a8270c7
1 parent c950d3a
commit a8270c7
Show file tree

Hide file tree

Showing 18 changed files with 465 additions and 276 deletions.
diff --git a/harry-core/src/harry/corruptor/AddExtraRowCorruptor.java b/harry-core/src/harry/corruptor/AddExtraRowCorruptor.java
@@ -21,6 +21,9 @@
 import java.util.HashSet;
 import java.util.Set;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import harry.data.ResultSetRow;
 import harry.ddl.SchemaSpec;
 import harry.model.OpSelectors;
@@ -31,6 +34,8 @@
 
 public class AddExtraRowCorruptor implements QueryResponseCorruptor
 {
+    private static final Logger logger = LoggerFactory.getLogger(AddExtraRowCorruptor.class);
+
     private final SchemaSpec schema;
     private final OpSelectors.MonotonicClock clock;
     private final OpSelectors.DescriptorSelector descriptorSelector;
@@ -57,25 +62,28 @@ public boolean maybeCorrupt(Query query, SystemUnderTest sut)
                 maxLts = Math.max(maxLts, row.lts[i]);
         }
 
-        if (cds.size() >= descriptorSelector.maxPartitionSize())
-            return false;
+        boolean partitionIsFull = cds.size() >= descriptorSelector.maxPartitionSize();
 
-        long cd;
         long attempt = 0;
-        do
+        long cd = descriptorSelector.randomCd(query.pd, attempt, schema);;
+        while (!query.match(cd) || cds.contains(cd))
         {
-            cd = descriptorSelector.randomCd(query.pd, attempt, schema);
+            if (partitionIsFull)
+                // We can't pick from the existing CDs, so let's try to come up with a new one that would match the query
+                cd += descriptorSelector.randomCd(query.pd, attempt, schema);
+            else
+                cd = descriptorSelector.randomCd(query.pd, attempt, schema);
             if (attempt++ == 1000)
                 return false;
         }
-        while (!query.match(cd) || cds.contains(cd));
 
         long[] vds = descriptorSelector.vds(query.pd, cd, maxLts, 0, schema);
 
         // We do not know if the row was deleted. We could try inferring it, but that
         // still won't help since we can't use it anyways, since collisions between a
         // written value and tombstone are resolved in favour of tombstone, so we're
         // just going to take the next lts.
+        logger.info("Corrupting the resultset by writing a row with cd {}", cd);
         sut.execute(WriteHelper.inflateInsert(schema, query.pd, cd, vds, clock.rts(maxLts) + 1));
         return true;
     }

diff --git a/harry-core/src/harry/ddl/ColumnSpec.java b/harry-core/src/harry/ddl/ColumnSpec.java
@@ -104,11 +104,6 @@ public T inflate(long current)
         return type.generator().inflate(current);
     }
 
-    public long adjustEntropyDomain(long current)
-    {
-        return type.generator().adjustEntropyDomain(current);
-    }
-
     public long deflate(T value)
     {
         return type.generator().deflate(value);
@@ -162,7 +157,10 @@ public boolean isReversed()
 
         public abstract Bijections.Bijection<T> generator();
 
-        public abstract int maxSize();
+        public int maxSize()
+        {
+            return generator().byteSize();
+        }
 
         public String toString()
         {
@@ -176,84 +174,56 @@ public Bijections.Bijection<Byte> generator()
         {
             return Bijections.INT8_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Byte.BYTES;
-        }
     };
+
     public static final DataType<Short> int16Type = new DataType<Short>("smallint")
     {
         public Bijections.Bijection<Short> generator()
         {
             return Bijections.INT16_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Short.BYTES;
-        }
     };
+
     public static final DataType<Integer> int32Type = new DataType<Integer>("int")
     {
         public Bijections.Bijection<Integer> generator()
         {
             return Bijections.INT32_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Integer.BYTES;
-        }
     };
+
     public static final DataType<Long> int64Type = new DataType<Long>("bigint")
     {
         public Bijections.Bijection<Long> generator()
         {
             return Bijections.INT64_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Long.BYTES;
-        }
     };
+
     public static final DataType<Boolean> booleanType = new DataType<Boolean>("boolean")
     {
         public Bijections.Bijection<Boolean> generator()
         {
             return Bijections.BOOLEAN_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Byte.BYTES;
-        }
     };
+
     public static final DataType<Float> floatType = new DataType<Float>("float")
     {
         public Bijections.Bijection<Float> generator()
         {
             return Bijections.FLOAT_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Float.BYTES;
-        }
     };
+
     public static final DataType<Double> doubleType = new DataType<Double>("double")
     {
         public Bijections.Bijection<Double> generator()
         {
             return Bijections.DOUBLE_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Double.BYTES;
-        }
     };
+
     public static final DataType<String> asciiType = new DataType<String>("ascii")
     {
         private final Bijections.Bijection<String> gen = new StringBijection();
@@ -262,11 +232,6 @@ public Bijections.Bijection<String> generator()
         {
             return gen;
         }
-
-        public int maxSize()
-        {
-            return Long.BYTES;
-        }
     };
 
     public static DataType<String> asciiType(int nibbleSize, int maxRandomNibbles)
@@ -279,11 +244,6 @@ public Bijections.Bijection<String> generator()
             {
                 return gen;
             }
-
-            public int maxSize()
-            {
-                return Long.BYTES;
-            }
         };
     }
 
@@ -293,11 +253,6 @@ public Bijections.Bijection<UUID> generator()
         {
             return Bijections.UUID_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Long.BYTES;
-        }
     };
 
     public static final DataType<Date> timestampType = new DataType<Date>("timestamp")
@@ -306,11 +261,6 @@ public Bijections.Bijection<Date> generator()
         {
             return Bijections.TIMESTAMP_GENERATOR;
         }
-
-        public int maxSize()
-        {
-            return Long.BYTES;
-        }
     };
 
     public static final Collection<DataType<?>> DATA_TYPES = ImmutableList.of(

diff --git a/harry-core/src/harry/ddl/SchemaGenerators.java b/harry-core/src/harry/ddl/SchemaGenerators.java
@@ -168,10 +168,15 @@ public Builder partitionKeyColumnCount(int minCols, int maxCols)
         }
 
         public Builder partitionKeySpec(int minCols, int maxCols, ColumnSpec.DataType<?>... columnTypes)
+        {
+            return partitionKeySpec(minCols, maxCols, Arrays.asList(columnTypes));
+        }
+
+        public Builder partitionKeySpec(int minCols, int maxCols, Collection<ColumnSpec.DataType<?>> columnTypes)
         {
             this.minPks = minCols;
             this.maxPks = maxCols;
-            this.pkGenerator = columnSpecGenerator(Arrays.asList(columnTypes), "pk", ColumnSpec.Kind.PARTITION_KEY);
+            this.pkGenerator = columnSpecGenerator(columnTypes, "pk", ColumnSpec.Kind.PARTITION_KEY);
             return this;
         }
 
@@ -188,10 +193,15 @@ public Builder clusteringColumnCount(int minCols, int maxCols)
         }
 
         public Builder clusteringKeySpec(int minCols, int maxCols, ColumnSpec.DataType<?>... columnTypes)
+        {
+            return clusteringKeySpec(minCols, maxCols, Arrays.asList(columnTypes));
+        }
+
+        public Builder clusteringKeySpec(int minCols, int maxCols, Collection<ColumnSpec.DataType<?>> columnTypes)
         {
             this.minCks = minCols;
             this.maxCks = maxCols;
-            this.ckGenerator = columnSpecGenerator(Arrays.asList(columnTypes), "ck", ColumnSpec.Kind.CLUSTERING);
+            this.ckGenerator = columnSpecGenerator(columnTypes, "ck", ColumnSpec.Kind.CLUSTERING);
             return this;
         }
 
@@ -208,10 +218,15 @@ public Builder regularColumnCount(int minCols, int maxCols)
         }
 
         public Builder regularColumnSpec(int minCols, int maxCols, ColumnSpec.DataType<?>... columnTypes)
+        {
+            return this.regularColumnSpec(minCols, maxCols, Arrays.asList(columnTypes));
+        }
+
+        public Builder regularColumnSpec(int minCols, int maxCols, Collection<ColumnSpec.DataType<?>> columnTypes)
         {
             this.minRegular = minCols;
             this.maxRegular = maxCols;
-            this.regularGenerator = columnSpecGenerator(Arrays.asList(columnTypes), "regular", ColumnSpec.Kind.REGULAR);
+            this.regularGenerator = columnSpecGenerator(columnTypes, "regular", ColumnSpec.Kind.REGULAR);
             return this;
         }
 
@@ -292,4 +307,101 @@ public static Surjections.Surjection<SchemaSpec> defaultSchemaSpecGen(String ks,
                                   ColumnSpec.asciiType(5, 10))
                .surjection();
     }
-}
+
+    public static final String DEFAULT_KEYSPACE_NAME = "harry";
+    private static final String DEFAULT_PREFIX = "table_";
+    private static final AtomicInteger counter = new AtomicInteger();
+    private static final Supplier<String> tableNameSupplier = () -> DEFAULT_PREFIX + counter.getAndIncrement();
+
+    // simplest schema gen, nothing can go wrong with it
+    public static final Surjections.Surjection<SchemaSpec> longOnlySpecBuilder = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                 .partitionKeySpec(1, 1, ColumnSpec.int64Type)
+                                                                                 .clusteringKeySpec(1, 1, ColumnSpec.int64Type)
+                                                                                 .regularColumnSpec(1, 10, ColumnSpec.int64Type)
+                                                                                 .surjection();
+
+    private static final ColumnSpec.DataType<String> simpleStringType = ColumnSpec.asciiType(4, 10);
+    private static final Surjections.Surjection<SchemaSpec> longAndStringSpecBuilder = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                       .partitionKeySpec(2, 2, ColumnSpec.int64Type, simpleStringType)
+                                                                                       .clusteringKeySpec(2, 2, ColumnSpec.int64Type, simpleStringType)
+                                                                                       .regularColumnSpec(1, 10, ColumnSpec.int64Type, simpleStringType)
+                                                                                       .surjection();
+
+    public static final Surjections.Surjection<SchemaSpec> longOnlyWithReverseSpecBuilder = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                            .partitionKeySpec(1, 1, ColumnSpec.int64Type)
+                                                                                            .clusteringKeySpec(1, 1, ColumnSpec.ReversedType.getInstance(ColumnSpec.int64Type))
+                                                                                            .regularColumnSpec(1, 10, ColumnSpec.int64Type)
+                                                                                            .surjection();
+
+    public static final Surjections.Surjection<SchemaSpec> longAndStringSpecWithReversedLongBuilder = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                                      .partitionKeySpec(2, 2, ColumnSpec.int64Type, simpleStringType)
+                                                                                                      .clusteringKeySpec(2, 2, ColumnSpec.ReversedType.getInstance(ColumnSpec.int64Type), simpleStringType)
+                                                                                                      .regularColumnSpec(1, 10, ColumnSpec.int64Type, simpleStringType)
+                                                                                                      .surjection();
+
+    public static final Surjections.Surjection<SchemaSpec> longAndStringSpecWithReversedStringBuilder = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                                        .partitionKeySpec(2, 2, ColumnSpec.int64Type, simpleStringType)
+                                                                                                        .clusteringKeySpec(2, 2, ColumnSpec.int64Type, ColumnSpec.ReversedType.getInstance(simpleStringType))
+                                                                                                        .regularColumnSpec(1, 10, ColumnSpec.int64Type, simpleStringType)
+                                                                                                        .surjection();
+
+    public static final Surjections.Surjection<SchemaSpec> longAndStringSpecWithReversedBothBuilder = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                                      .partitionKeySpec(2, 2, ColumnSpec.int64Type, simpleStringType)
+                                                                                                      .clusteringKeySpec(2, 2, ColumnSpec.ReversedType.getInstance(ColumnSpec.int64Type), ColumnSpec.ReversedType.getInstance(simpleStringType))
+                                                                                                      .regularColumnSpec(1, 10, ColumnSpec.int64Type, simpleStringType)
+                                                                                                      .surjection();
+
+    public static final Surjections.Surjection<SchemaSpec> withAllFeaturesEnabled = new SchemaGenerators.Builder(DEFAULT_KEYSPACE_NAME, tableNameSupplier)
+                                                                                    .partitionKeySpec(1, 4, columnTypes)
+                                                                                    .clusteringKeySpec(1, 4, clusteringKeyTypes)
+                                                                                    .regularColumnSpec(1, 10, columnTypes)
+                                                                                    .surjection();
+
+    public static final Surjections.Surjection<SchemaSpec>[] PROGRESSIVE_GENERATORS = new Surjections.Surjection[]{
+    longOnlySpecBuilder,
+    longAndStringSpecBuilder,
+    longOnlyWithReverseSpecBuilder,
+    longAndStringSpecWithReversedLongBuilder,
+    longAndStringSpecWithReversedStringBuilder,
+    longAndStringSpecWithReversedBothBuilder,
+    withAllFeaturesEnabled
+    };
+    // Create schema generators that would produce tables starting with just a few features, progressing to use more
+    public static Supplier<SchemaSpec> progression(int switchAfter)
+    {
+        Supplier<SchemaSpec>[] generators = new Supplier[PROGRESSIVE_GENERATORS.length];
+        for (int i = 0; i < generators.length; i++)
+            generators[i] = PROGRESSIVE_GENERATORS[i].toSupplier();
+
+        return new Supplier<SchemaSpec>()
+        {
+            private final AtomicInteger counter = new AtomicInteger();
+            public SchemaSpec get()
+            {
+                int idx = (counter.getAndIncrement() / switchAfter) % generators.length;
+                SchemaSpec spec = generators[idx].get();
+                int tries = 100;
+                while ((spec.ckGenerator.byteSize() != Long.BYTES || spec.pkGenerator.byteSize() != Long.BYTES) && tries > 0)
+                {
+                    System.out.println("Skipping schema, since it doesn't have enough entropy bits available: " + spec.compile().cql());
+                    spec = generators[idx].get();
+                    tries--;
+                }
+
+                assert tries > 0 : String.format("Max number of tries exceeded on generator %d, can't generate a needed schema", idx);
+                return spec;
+            }
+
+
+        };
+    }
+
+    public static int DEFAULT_SWITCH_AFTER = 5;
+    public static int GENERATORS_COUNT = PROGRESSIVE_GENERATORS.length;
+    public static int DEFAULT_RUNS = DEFAULT_SWITCH_AFTER * PROGRESSIVE_GENERATORS.length;
+
+    public static Supplier<SchemaSpec> progression()
+    {
+        return progression(DEFAULT_SWITCH_AFTER); // would generate 30 tables before wrapping around
+    }
+}
diff --git a/harry-core/src/harry/ddl/SchemaSpec.java b/harry-core/src/harry/ddl/SchemaSpec.java
@@ -32,6 +32,7 @@
 import harry.util.BitSet;
 
 // TODO: improve API of this class
+// TODO: forbid schemas where pk and cks don't add up to 64 bits (for now)
 public class SchemaSpec
 {
     public interface SchemaSpecFactory