diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexer.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexer.java index fa3085d79cb9..5321a9f34b5c 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexer.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexer.java @@ -18,25 +18,23 @@ package org.apache.paimon.fileindex; -import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex; import org.apache.paimon.options.Options; import org.apache.paimon.types.DataType; -import static org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex.BLOOM_FILTER; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** File index interface. To build a file index. */ public interface FileIndexer { + Logger LOG = LoggerFactory.getLogger(FileIndexer.class); + FileIndexWriter createWriter(); FileIndexReader createReader(byte[] serializedBytes); static FileIndexer create(String type, DataType dataType, Options options) { - switch (type) { - case BLOOM_FILTER: - return new BloomFilterFileIndex(dataType, options); - default: - throw new RuntimeException("Doesn't support filter type: " + type); - } + FileIndexerFactory fileIndexerFactory = FileIndexerFactoryUtils.load(type); + return fileIndexerFactory.create(dataType, options); } } diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java new file mode 100644 index 000000000000..85dcc97f4640 --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactory.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.fileindex; + +import org.apache.paimon.options.Options; +import org.apache.paimon.types.DataType; + +/** File index factory to construct {@link FileIndexer}. */ +public interface FileIndexerFactory { + + String identifier(); + + FileIndexer create(DataType type, Options options); +} diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java new file mode 100644 index 000000000000..269ba77266e3 --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexerFactoryUtils.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.fileindex; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; +import java.util.ServiceLoader; + +/** Load utils to load FileIndexerFactory. */ +public class FileIndexerFactoryUtils { + + private static final Logger LOG = LoggerFactory.getLogger(FileIndexerFactoryUtils.class); + + private static final Map factories = new HashMap<>(); + + static { + ServiceLoader serviceLoader = + ServiceLoader.load(FileIndexerFactory.class); + + for (FileIndexerFactory indexerFactory : serviceLoader) { + if (factories.put(indexerFactory.identifier(), indexerFactory) != null) { + LOG.warn( + "Found multiple FileIndexer for type: " + + indexerFactory.identifier() + + ", choose one of them"); + } + } + } + + static FileIndexerFactory load(String type) { + FileIndexerFactory fileIndexerFactory = factories.get(type); + if (fileIndexerFactory == null) { + throw new RuntimeException("Can't find file index for type: " + type); + } + return fileIndexerFactory; + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java index 48f109a631ac..7a889309ee5d 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndex.java @@ -39,8 +39,6 @@ */ public class BloomFilterFileIndex implements FileIndexer { - public static final String BLOOM_FILTER = "bloom-filter"; - private static final int DEFAULT_ITEMS = 1_000_000; private static final double DEFAULT_FPP = 0.1; @@ -57,10 +55,6 @@ public BloomFilterFileIndex(DataType dataType, Options options) { this.fpp = options.getDouble(FPP, DEFAULT_FPP); } - public String name() { - return BLOOM_FILTER; - } - @Override public FileIndexWriter createWriter() { return new Writer(dataType, items, fpp); diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java new file mode 100644 index 000000000000..ec77a0a04075 --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bloomfilter/BloomFilterFileIndexFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.fileindex.bloomfilter; + +import org.apache.paimon.fileindex.FileIndexer; +import org.apache.paimon.fileindex.FileIndexerFactory; +import org.apache.paimon.options.Options; +import org.apache.paimon.types.DataType; + +/** Index factory to construct {@link BloomFilterFileIndex}. */ +public class BloomFilterFileIndexFactory implements FileIndexerFactory { + + public static final String BLOOM_FILTER = "bloom-filter"; + + @Override + public String identifier() { + return BLOOM_FILTER; + } + + @Override + public FileIndexer create(DataType type, Options options) { + return new BloomFilterFileIndex(type, options); + } +} diff --git a/paimon-common/src/main/resources/META-INF/services/org.apache.paimon.fileindex.FileIndexerFactory b/paimon-common/src/main/resources/META-INF/services/org.apache.paimon.fileindex.FileIndexerFactory new file mode 100644 index 000000000000..67abe62a60b7 --- /dev/null +++ b/paimon-common/src/main/resources/META-INF/services/org.apache.paimon.fileindex.FileIndexerFactory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndexFactory \ No newline at end of file diff --git a/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java index b569a76a8a08..1eaeb17ad408 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/AppendOnlyFileStoreTableTest.java @@ -26,7 +26,7 @@ import org.apache.paimon.data.InternalRow; import org.apache.paimon.data.serializer.InternalRowSerializer; import org.apache.paimon.fileindex.FileIndexOptions; -import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex; +import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndexFactory; import org.apache.paimon.fs.FileIOFinder; import org.apache.paimon.fs.Path; import org.apache.paimon.fs.local.LocalFileIO; @@ -386,26 +386,26 @@ public void testBloomFilterInMemory() throws Exception { options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + "." + CoreOptions.COLUMNS, "index_column, index_column2, index_column3"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column.items", "150"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column2.items", "150"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column3.items", "150"); options.set(FILE_INDEX_IN_MANIFEST_THRESHOLD.key(), "500 B"); @@ -452,7 +452,7 @@ public void testBloomFilterInDisk() throws Exception { options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + "." + CoreOptions.COLUMNS, "index_column, index_column2, index_column3"); @@ -510,32 +510,32 @@ public void testBloomFilterForMapField() throws Exception { options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + "." + CoreOptions.COLUMNS, "index_column, index_column2, index_column3[a], index_column3[b], index_column3[c], index_column3[d]"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column.items", "150"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column2.items", "150"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column3.items", "150"); options.set( FileIndexOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + ".index_column3[a].items", "10000"); }); diff --git a/paimon-core/src/test/java/org/apache/paimon/table/source/snapshot/SnapshotReaderTest.java b/paimon-core/src/test/java/org/apache/paimon/table/source/snapshot/SnapshotReaderTest.java index 488434013443..c0383efd584f 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/source/snapshot/SnapshotReaderTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/source/snapshot/SnapshotReaderTest.java @@ -22,7 +22,7 @@ import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.serializer.InternalRowSerializer; -import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndex; +import org.apache.paimon.fileindex.bloomfilter.BloomFilterFileIndexFactory; import org.apache.paimon.fs.FileIO; import org.apache.paimon.fs.FileIOFinder; import org.apache.paimon.fs.Path; @@ -352,7 +352,7 @@ private FileStoreTable createFileStoreTable( options.set( CoreOptions.FILE_INDEX + "." - + BloomFilterFileIndex.BLOOM_FILTER + + BloomFilterFileIndexFactory.BLOOM_FILTER + "." + CoreOptions.COLUMNS, rowType.getFieldNames().get(0));