-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce DimensionTableDataManager (#6346)
* Add DimensionTableData manager * Address review comments. * CLose reader after using. * Revisit javadocs. * Release segment after use. * Touch up instance instantiation. * Cleanup segment in test. * Release segments in "finally" block. * Update logs. * Add TableConfig validations for Dim Tables. * Seperate IngestionConfigTests for dim tables. * Remove defensive null checks. * Fix github action profile name. * Fix ingestionTest dependencies. * Undo the gihub-actions mvn profile name fix.
- Loading branch information
Showing
11 changed files
with
649 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
164 changes: 164 additions & 0 deletions
164
...e/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pinot.core.data.manager.offline; | ||
|
||
import java.io.File; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.locks.Lock; | ||
import java.util.concurrent.locks.ReadWriteLock; | ||
import java.util.concurrent.locks.ReentrantReadWriteLock; | ||
import javax.annotation.concurrent.GuardedBy; | ||
import javax.annotation.concurrent.ThreadSafe; | ||
import org.apache.pinot.common.metadata.ZKMetadataProvider; | ||
import org.apache.pinot.core.data.manager.SegmentDataManager; | ||
import org.apache.pinot.core.data.readers.PinotSegmentRecordReader; | ||
import org.apache.pinot.core.indexsegment.IndexSegment; | ||
import org.apache.pinot.core.segment.index.loader.IndexLoadingConfig; | ||
import org.apache.pinot.spi.data.FieldSpec; | ||
import org.apache.pinot.spi.data.Schema; | ||
import org.apache.pinot.spi.data.readers.GenericRow; | ||
import org.apache.pinot.spi.data.readers.PrimaryKey; | ||
|
||
|
||
/** | ||
* Dimension Table is a special type of OFFLINE table which is assigned to all servers | ||
* in a tenant and is used to execute a LOOKUP Transform Function. DimensionTableDataManager | ||
* loads the contents into a HashMap for faster access thus the size should be small | ||
* enough to easily fit in memory. | ||
* | ||
* DimensionTableDataManager uses Registry of Singletons pattern to store one instance per table | ||
* which can be accessed via {@link #getInstanceByTableName} static method. | ||
*/ | ||
@ThreadSafe | ||
public class DimensionTableDataManager extends OfflineTableDataManager { | ||
// Storing singletons per table in a HashMap | ||
private static final Map<String, DimensionTableDataManager> _instances = new ConcurrentHashMap<>(); | ||
|
||
private DimensionTableDataManager() { | ||
} | ||
|
||
/** | ||
* `createInstanceByTableName` should only be used by the {@link TableDataManagerProvider} and the returned | ||
* instance should be properly initialized via {@link #init} method before using. | ||
*/ | ||
public static DimensionTableDataManager createInstanceByTableName(String tableNameWithType) { | ||
return _instances.computeIfAbsent(tableNameWithType, k -> new DimensionTableDataManager()); | ||
} | ||
|
||
public static DimensionTableDataManager getInstanceByTableName(String tableNameWithType) { | ||
return _instances.get(tableNameWithType); | ||
} | ||
|
||
/** | ||
* Instance properties/methods | ||
*/ | ||
private final ReadWriteLock _rwl = new ReentrantReadWriteLock(); | ||
private final Lock _lookupTableReadLock = _rwl.readLock(); | ||
private final Lock _lookupTableWriteLock = _rwl.writeLock(); | ||
|
||
// _lookupTable is a HashMap used for storing/serving records for a table keyed by table PK | ||
@GuardedBy("_rwl") | ||
private final Map<PrimaryKey, GenericRow> _lookupTable = new HashMap<>(); | ||
private Schema _tableSchema; | ||
private List<String> _primaryKeyColumns; | ||
|
||
@Override | ||
protected void doInit() { | ||
super.doInit(); | ||
|
||
// dimension tables should always have schemas with primary keys | ||
_tableSchema = ZKMetadataProvider.getTableSchema(_propertyStore, _tableNameWithType); | ||
_primaryKeyColumns = _tableSchema.getPrimaryKeyColumns(); | ||
} | ||
|
||
@Override | ||
public void addSegment(File indexDir, IndexLoadingConfig indexLoadingConfig) | ||
throws Exception { | ||
super.addSegment(indexDir, indexLoadingConfig); | ||
try { | ||
loadLookupTable(); | ||
_logger.info("Successfully added segment {} and loaded lookup table: {}", indexDir.getName(), getTableName()); | ||
} catch (Exception e) { | ||
throw new RuntimeException(String.format("Error loading lookup table: %s", getTableName()), e); | ||
} | ||
} | ||
|
||
@Override | ||
public void removeSegment(String segmentName) { | ||
super.removeSegment(segmentName); | ||
try { | ||
loadLookupTable(); | ||
_logger.info("Successfully removed segment {} and reloaded lookup table: {}", segmentName, getTableName()); | ||
} catch (Exception e) { | ||
throw new RuntimeException(String | ||
.format("Error reloading lookup table after segment remove ({}) for table: {}", segmentName, getTableName()), | ||
e); | ||
} | ||
} | ||
|
||
/** | ||
* `loadLookupTable()` reads contents of the DimensionTable into _lookupTable HashMap for fast lookup. | ||
*/ | ||
private void loadLookupTable() | ||
throws Exception { | ||
_lookupTableWriteLock.lock(); | ||
try { | ||
_lookupTable.clear(); | ||
List<SegmentDataManager> segmentManagers = acquireAllSegments(); | ||
if (segmentManagers.size() == 0) { | ||
return; | ||
} | ||
|
||
try { | ||
for (SegmentDataManager segmentManager : segmentManagers) { | ||
IndexSegment indexSegment = segmentManager.getSegment(); | ||
try (PinotSegmentRecordReader reader = new PinotSegmentRecordReader( | ||
indexSegment.getSegmentMetadata().getIndexDir())) { | ||
while (reader.hasNext()) { | ||
GenericRow row = reader.next(); | ||
_lookupTable.put(row.getPrimaryKey(_primaryKeyColumns), row); | ||
} | ||
} | ||
} | ||
} finally { | ||
for (SegmentDataManager segmentManager : segmentManagers) { | ||
releaseSegment(segmentManager); | ||
} | ||
} | ||
} finally { | ||
_lookupTableWriteLock.unlock(); | ||
} | ||
} | ||
|
||
public GenericRow lookupRowByPrimaryKey(PrimaryKey pk) { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
cbalci
Author
Contributor
|
||
_lookupTableReadLock.lock(); | ||
try { | ||
return _lookupTable.get(pk); | ||
} finally { | ||
_lookupTableReadLock.unlock(); | ||
} | ||
} | ||
|
||
public FieldSpec getColumnFieldSpec(String columnName) { | ||
return _tableSchema.getFieldSpecFor(columnName); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
why is this method part of OfflineTableDataManager. This should be modeled as part of segment interface right?