Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGNITE-17855 Implement inline size calculation for B+tree #1179

Merged
merged 14 commits into from
Oct 11, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static NativeType numberOf(int precision) {
/**
* Creates a STRING type with maximal length is <code>len</code>.
*
* @param len Maximum length of the string.
* @param len Maximum length of the string, {@link Integer#MAX_VALUE} if not defined.
* @return Native type.
*/
public static NativeType stringOf(int len) {
Expand All @@ -111,7 +111,7 @@ public static NativeType stringOf(int len) {
/**
* Creates a BYTES type with maximal length is <code>len</code>.
*
* @param len Maximum length of the byte array.
* @param len Maximum length of the byte array, {@link Integer#MAX_VALUE} if not defined.
* @return Native type.
*/
public static NativeType blobOf(int len) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,35 +23,33 @@
* Variable-length native type.
*/
public class VarlenNativeType extends NativeType {
/** Length of the type. */
/** Length of the type, {@link Integer#MAX_VALUE} if not defined. */
private final int len;

/**
* Constructor.
*
* @param typeSpec Type spec.
* @param len Type length.
* @param len Type length.
*/
protected VarlenNativeType(NativeTypeSpec typeSpec, int len) {
super(typeSpec);

this.len = len;
}

/** {@inheritDoc} */
@Override
public boolean mismatch(NativeType type) {
return super.mismatch(type) || len < ((VarlenNativeType) type).len;
}

/**
* Get length of the type.
* Get length of the type, {@link Integer#MAX_VALUE} if not defined.
*/
public int length() {
return len;
}

/** {@inheritDoc} */
@Override
public String toString() {
return S.toString(VarlenNativeType.class.getSimpleName(), "name", spec(), "len", len);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import org.apache.ignite.internal.schema.BinaryTupleSchema;
import org.apache.ignite.internal.schema.NativeTypeSpec;
import org.apache.ignite.internal.schema.row.InternalTuple;
import org.apache.ignite.internal.storage.index.SortedIndexDescriptor.ColumnDescriptor;
import org.apache.ignite.internal.storage.index.SortedIndexDescriptor.SortedIndexColumnDescriptor;

/**
* Comparator implementation for comparing {@link BinaryTuple}s on a per-column basis.
Expand Down Expand Up @@ -63,10 +63,10 @@ public int compare(ByteBuffer buffer1, ByteBuffer buffer2) {

int columnsToCompare = Math.min(tuple1.count(), tuple2.count());

assert columnsToCompare <= descriptor.indexColumns().size();
assert columnsToCompare <= descriptor.columns().size();

for (int i = 0; i < columnsToCompare; i++) {
ColumnDescriptor columnDescriptor = descriptor.indexColumns().get(i);
SortedIndexColumnDescriptor columnDescriptor = descriptor.columns().get(i);

int compare = compareField(tuple1, tuple2, i);

Expand Down Expand Up @@ -102,7 +102,7 @@ private int compareField(InternalTuple tuple1, InternalTuple tuple2, int index)
return 1;
}

ColumnDescriptor columnDescriptor = descriptor.indexColumns().get(index);
SortedIndexColumnDescriptor columnDescriptor = descriptor.columns().get(index);

NativeTypeSpec typeSpec = columnDescriptor.type().spec();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,40 +38,34 @@
*
* @see HashIndexStorage
*/
public class HashIndexDescriptor {
public class HashIndexDescriptor implements IndexDescriptor {
/**
* Descriptor of a Hash Index column.
*/
public static class ColumnDescriptor {
public static class HashIndexColumnDescriptor implements ColumnDescriptor {
private final String name;

private final NativeType type;

private final boolean nullable;

ColumnDescriptor(ColumnView tableColumnView) {
HashIndexColumnDescriptor(ColumnView tableColumnView) {
this.name = tableColumnView.name();
this.type = ConfigurationToSchemaDescriptorConverter.convert(tableColumnView.type());
this.nullable = tableColumnView.nullable();
}

/**
* Returns the name of an index column.
*/
@Override
public String name() {
return name;
}

/**
* Returns a column type.
*/
@Override
public NativeType type() {
return type;
}

/**
* Returns {@code true} if this column can contain null values or {@code false} otherwise.
*/
@Override
public boolean nullable() {
return nullable;
}
Expand All @@ -84,7 +78,7 @@ public String toString() {

private final UUID id;

private final List<ColumnDescriptor> columns;
private final List<HashIndexColumnDescriptor> columns;

/**
* Creates an Index Descriptor from a given Table Configuration.
Expand Down Expand Up @@ -122,22 +116,18 @@ public HashIndexDescriptor(UUID indexId, TablesView tablesConfig) {

assert columnView != null : "Incorrect index column configuration. " + columnName + " column does not exist";

return new ColumnDescriptor(columnView);
return new HashIndexColumnDescriptor(columnView);
})
.collect(toUnmodifiableList());
}

/**
* Returns the ID of this Index.
*/
@Override
public UUID id() {
return id;
}

/**
* Returns the Column Descriptors that comprise a row of this index.
*/
public List<ColumnDescriptor> indexColumns() {
@Override
public List<HashIndexColumnDescriptor> columns() {
return columns;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.internal.storage.index;

import java.util.List;
import java.util.UUID;
import org.apache.ignite.internal.schema.NativeType;

/**
* Index descriptor.
*/
public interface IndexDescriptor {
/**
* Index column descriptor.
*/
interface ColumnDescriptor {
/**
* Returns the name of an index column.
*/
String name();

/**
* Returns a column descriptor.
tkalkirill marked this conversation as resolved.
Show resolved Hide resolved
*/
NativeType type();

/**
* Returns {@code true} if this column can contain null values or {@code false} otherwise.
*/
boolean nullable();
}

/**
* Returns the index ID.
*/
UUID id();

/**
* Returns index column descriptions.
*/
List<? extends ColumnDescriptor> columns();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.internal.storage.index;

import static org.apache.ignite.internal.util.Constants.KiB;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.List;
import org.apache.ignite.internal.binarytuple.BinaryTupleCommon;
import org.apache.ignite.internal.schema.BinaryTuple;
import org.apache.ignite.internal.schema.NativeType;
import org.apache.ignite.internal.schema.NativeTypeSpec;
import org.apache.ignite.internal.schema.VarlenNativeType;
import org.apache.ignite.internal.storage.index.IndexDescriptor.ColumnDescriptor;

/**
* Helper class for index inlining.
*/
public class InlineUtils {
/** Inline size of an undefined (not set by the user) variable length column in bytes. */
static final int UNDEFINED_VARLEN_INLINE_SIZE = 10;

/** Inline size for large numbers ({@link BigDecimal} and {@link BigInteger}) in bytes. */
static final int BIG_NUMBER_INLINE_SIZE = 4;

/** Maximum inline size for a {@link BinaryTuple}, in bytes. */
static final int MAX_BINARY_TUPLE_INLINE_SIZE = 2 * KiB;

/** {@link BinaryTuple} size class in bytes. */
static final int BINARY_TUPLE_SIZE_CLASS = 2;

/**
* Calculates inline size for column.
*
* @param nativeType Column type.
* @return Inline size in bytes.
*/
static int inlineSize(NativeType nativeType) {
NativeTypeSpec spec = nativeType.spec();

if (spec.fixedLength()) {
return nativeType.sizeInBytes();
}

// Variable length columns.

switch (spec) {
case STRING: {
int length = ((VarlenNativeType) nativeType).length();

return length == Integer.MAX_VALUE ? UNDEFINED_VARLEN_INLINE_SIZE : length * 2;
Copy link
Member

@AMashenkov AMashenkov Oct 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return length == Integer.MAX_VALUE ? UNDEFINED_VARLEN_INLINE_SIZE : length * 2;
return length == Integer.MAX_VALUE ? UNDEFINED_VARLEN_INLINE_SIZE : length;

Do you expect most of users will use 2 bytes chars?
Why should an index over columns of ASCII chars be twice large by default?
And what to do with 4-byte chars?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I proceed from the fact that in Java characters is char (2 bytes).
We can assume, as in Java, that in most cases the characters will be ascii (1 byte).
We're just doing an approximate inline count, I don't know how often we'll see 4 byte characters.

Later we can add an encoding (utf-8, 16, 32) to the configuration or ddl.

wdyt?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, also I'd limit default value for var-length types (e.g. with 64-256 bytes).

Usually, the upper limit for var-length columns is usually calculated with some reserve.
So, the actual values size is less the limit (often noticeably).
Also, large inline-size means less data will fit to the index page and may have negative impact on performance.

In case of relatively large values with similar prefixes user always can increase inline-size manually,
but imho, in most of cases, this limit will reduce index size with no performance penalty.

}

case BYTES: {
int length = ((VarlenNativeType) nativeType).length();

return length == Integer.MAX_VALUE ? UNDEFINED_VARLEN_INLINE_SIZE : length;
}

case DECIMAL:
case NUMBER:
return BIG_NUMBER_INLINE_SIZE;

default:
throw new IllegalArgumentException("Unknown type " + spec);
}
}

/**
* Calculates inline size for {@link BinaryTuple}, given its format.
*
* @param indexDescriptor Index descriptor.
* @return Inline size in bytes, not more than the {@link #MAX_BINARY_TUPLE_INLINE_SIZE}.
*/
static int binaryTupleInlineSize(IndexDescriptor indexDescriptor) {
List<? extends ColumnDescriptor> columns = indexDescriptor.columns();

boolean hasNullColumns = columns.stream().anyMatch(ColumnDescriptor::nullable);

int inlineSize = BinaryTupleCommon.HEADER_SIZE
+ (hasNullColumns ? BinaryTupleCommon.nullMapSize(columns.size()) : 0)
+ columns.size() * BINARY_TUPLE_SIZE_CLASS;

for (int i = 0; i < columns.size() && inlineSize < MAX_BINARY_TUPLE_INLINE_SIZE; i++) {
inlineSize += inlineSize(columns.get(i).type());
}

return Math.min(inlineSize, MAX_BINARY_TUPLE_INLINE_SIZE);
}
}
Loading