Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,23 @@

package org.apache.paimon.fileindex;

import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.DataType;

import static org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex.BLOOM_FILTER;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** File index interface. To build a file index. */
public interface FileIndexer {
Comment thread
leaves12138 marked this conversation as resolved.

Logger LOG = LoggerFactory.getLogger(FileIndexer.class);

FileIndexWriter createWriter();

FileIndexReader createReader(byte[] serializedBytes);

static FileIndexer create(String type, DataType dataType, Options options) {
switch (type) {
case BLOOM_FILTER:
return new BloomFilterFileIndex(dataType, options);
default:
throw new RuntimeException("Doesn't support filter type: " + type);
}
FileIndexerFactory fileIndexerFactory = FileIndexerFactoryUtils.load(type);
return fileIndexerFactory.create(dataType, options);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.fileindex;

import org.apache.paimon.options.Options;
import org.apache.paimon.types.DataType;

/** File index factory to construct {@link FileIndexer}. */
public interface FileIndexerFactory {

String identifier();

FileIndexer create(DataType type, Options options);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.fileindex;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.Map;
import java.util.ServiceLoader;

/** Load utils to load FileIndexerFactory. */
public class FileIndexerFactoryUtils {

private static final Logger LOG = LoggerFactory.getLogger(FileIndexerFactoryUtils.class);

private static final Map<String, FileIndexerFactory> factories = new HashMap<>();

static {
ServiceLoader<FileIndexerFactory> serviceLoader =
ServiceLoader.load(FileIndexerFactory.class);

for (FileIndexerFactory indexerFactory : serviceLoader) {
if (factories.put(indexerFactory.identifier(), indexerFactory) != null) {
LOG.warn(
"Found multiple FileIndexer for type: "
+ indexerFactory.identifier()
+ ", choose one of them");
}
}
}

static FileIndexerFactory load(String type) {
FileIndexerFactory fileIndexerFactory = factories.get(type);
if (fileIndexerFactory == null) {
throw new RuntimeException("Can't find file index for type: " + type);
}
return fileIndexerFactory;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@
*/
public class BloomFilterFileIndex implements FileIndexer {

public static final String BLOOM_FILTER = "bloom-filter";

private static final int DEFAULT_ITEMS = 1_000_000;
private static final double DEFAULT_FPP = 0.1;

Expand All @@ -57,10 +55,6 @@ public BloomFilterFileIndex(DataType dataType, Options options) {
this.fpp = options.getDouble(FPP, DEFAULT_FPP);
}

public String name() {
return BLOOM_FILTER;
}

@Override
public FileIndexWriter createWriter() {
return new Writer(dataType, items, fpp);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.fileindex.bloomfilter;

import org.apache.paimon.fileindex.FileIndexer;
import org.apache.paimon.fileindex.FileIndexerFactory;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.DataType;

/** Index factory to construct {@link BloomFilterFileIndex}. */
public class BloomFilterFileIndexFactory implements FileIndexerFactory {

public static final String BLOOM_FILTER = "bloom-filter";

@Override
public String identifier() {
return BLOOM_FILTER;
}

@Override
public FileIndexer create(DataType type, Options options) {
return new BloomFilterFileIndex(type, options);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndexFactory
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.serializer.InternalRowSerializer;
import org.apache.paimon.fileindex.FileIndexOptions;
import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex;
import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndexFactory;
import org.apache.paimon.fs.FileIOFinder;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.local.LocalFileIO;
Expand Down Expand Up @@ -386,26 +386,26 @@ public void testBloomFilterInMemory() throws Exception {
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ "."
+ CoreOptions.COLUMNS,
"index_column, index_column2, index_column3");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column.items",
"150");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column2.items",
"150");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column3.items",
"150");
options.set(FILE_INDEX_IN_MANIFEST_THRESHOLD.key(), "500 B");
Expand Down Expand Up @@ -452,7 +452,7 @@ public void testBloomFilterInDisk() throws Exception {
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ "."
+ CoreOptions.COLUMNS,
"index_column, index_column2, index_column3");
Expand Down Expand Up @@ -510,32 +510,32 @@ public void testBloomFilterForMapField() throws Exception {
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ "."
+ CoreOptions.COLUMNS,
"index_column, index_column2, index_column3[a], index_column3[b], index_column3[c], index_column3[d]");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column.items",
"150");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column2.items",
"150");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column3.items",
"150");
options.set(
FileIndexOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ ".index_column3[a].items",
"10000");
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.serializer.InternalRowSerializer;
import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex;
import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndexFactory;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.FileIOFinder;
import org.apache.paimon.fs.Path;
Expand Down Expand Up @@ -352,7 +352,7 @@ private FileStoreTable createFileStoreTable(
options.set(
CoreOptions.FILE_INDEX
+ "."
+ BloomFilterFileIndex.BLOOM_FILTER
+ BloomFilterFileIndexFactory.BLOOM_FILTER
+ "."
+ CoreOptions.COLUMNS,
rowType.getFieldNames().get(0));
Expand Down