Skip to content

Commit

Permalink
Fix config passing and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
yihua committed Sep 28, 2022
1 parent d3324be commit b054f56
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig;
Expand All @@ -34,7 +35,8 @@
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HiveSyncTool;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.apache.hudi.util.SparkKeyGenUtils;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.hudi.utilities.schema.SchemaProvider;

Expand All @@ -50,8 +52,15 @@
import java.util.HashMap;

import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
import static org.apache.hudi.common.table.HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT;
import static org.apache.hudi.common.table.HoodieTableConfig.POPULATE_META_FIELDS;
import static org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_TIMEZONE;
import static org.apache.hudi.config.HoodieBootstrapConfig.KEYGEN_CLASS_NAME;
import static org.apache.hudi.config.HoodieWriteConfig.PRECOMBINE_FIELD_NAME;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC;
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC_SPEC;
import static org.apache.hudi.keygen.constant.KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE;
import static org.apache.hudi.keygen.constant.KeyGeneratorOptions.RECORDKEY_FIELD_NAME;
import static org.apache.hudi.keygen.constant.KeyGeneratorOptions.URL_ENCODE_PARTITIONING;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
Expand Down Expand Up @@ -190,39 +199,44 @@ private void initializeTable() throws IOException {
+ ". Cannot bootstrap data on top of an existing table");
}
}
HoodieTableMetaClient.withPropertyBuilder()

HoodieTableMetaClient.PropertyBuilder builder = HoodieTableMetaClient.withPropertyBuilder()
.fromProperties(props)
.setTableType(cfg.tableType)
.setTableName(cfg.targetTableName)
.setRecordKeyFields(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()))
.setPartitionFields(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), null))
.setRecordKeyFields(props.getString(RECORDKEY_FIELD_NAME.key()))
.setKeyGeneratorClassProp(props.getString(
KEYGEN_CLASS_NAME.key(), SimpleKeyGenerator.class.getName()))
.setPreCombineField(props.getString(
HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(),
HoodieWriteConfig.PRECOMBINE_FIELD_NAME.defaultValue()))
PRECOMBINE_FIELD_NAME.key(), PRECOMBINE_FIELD_NAME.defaultValue()))
.setPopulateMetaFields(props.getBoolean(
HoodieTableConfig.POPULATE_META_FIELDS.key(),
HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()))
.setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
POPULATE_META_FIELDS.key(), POPULATE_META_FIELDS.defaultValue()))
.setArchiveLogFolder(props.getString(
ARCHIVELOG_FOLDER.key(), ARCHIVELOG_FOLDER.defaultValue()))
.setPayloadClassName(cfg.payloadClassName)
.setBaseFileFormat(cfg.baseFileFormat)
.setBootstrapIndexClass(cfg.bootstrapIndexClass)
.setBootstrapBasePath(bootstrapBasePath)
.setHiveStylePartitioningEnable(props.getBoolean(
KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.key(),
Boolean.parseBoolean(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.defaultValue())
HIVE_STYLE_PARTITIONING_ENABLE.key(),
Boolean.parseBoolean(HIVE_STYLE_PARTITIONING_ENABLE.defaultValue())
))
.setUrlEncodePartitioning(props.getBoolean(
URL_ENCODE_PARTITIONING.key(),
Boolean.parseBoolean(URL_ENCODE_PARTITIONING.defaultValue())))
.setCommitTimezone(HoodieTimelineTimeZone.valueOf(
props.getString(
HoodieTableConfig.TIMELINE_TIMEZONE.key(),
String.valueOf(HoodieTableConfig.TIMELINE_TIMEZONE.defaultValue()))))
.setPartitionMetafileUseBaseFormat(
props.getBoolean(
HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.key(),
HoodieTableConfig.PARTITION_METAFILE_USE_BASE_FORMAT.defaultValue()))
.initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
TIMELINE_TIMEZONE.key(),
String.valueOf(TIMELINE_TIMEZONE.defaultValue()))))
.setPartitionMetafileUseBaseFormat(props.getBoolean(
PARTITION_METAFILE_USE_BASE_FORMAT.key(),
PARTITION_METAFILE_USE_BASE_FORMAT.defaultValue()));
String partitionColumns = SparkKeyGenUtils.getPartitionColumns(props);
if (!StringUtils.isNullOrEmpty(partitionColumns)) {
builder.setPartitionFields(partitionColumns);
}

builder.initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
}

public HoodieWriteConfig getBootstrapConfig() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,17 @@
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieArchivalConfig;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieLockConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.HoodieHiveSyncClient;
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.apache.hudi.metrics.Metrics;
import org.apache.hudi.utilities.DummySchemaProvider;
Expand Down Expand Up @@ -644,7 +645,7 @@ public void testBulkInsertsAndUpsertsWithBootstrap() throws Exception {
String newDatasetBasePath = dfsBasePath + "/test_dataset_bootstrapped";
cfg.runBootstrap = true;
cfg.configs.add(String.format("hoodie.bootstrap.base.path=%s", bootstrapSourcePath));
cfg.configs.add(String.format("hoodie.bootstrap.keygen.class=%s", SimpleKeyGenerator.class.getName()));
cfg.configs.add(String.format("hoodie.bootstrap.keygen.class=%s", NonpartitionedKeyGenerator.class.getName()));
cfg.configs.add("hoodie.bootstrap.parallelism=5");
cfg.targetBasePath = newDatasetBasePath;
new HoodieDeltaStreamer(cfg, jsc).sync();
Expand Down

0 comments on commit b054f56

Please sign in to comment.