Skip to content

Commit

Permalink
[HUDI-4993] Make DataPlatform name and Dataset env configurable in Da…
Browse files Browse the repository at this point in the history
…tahubSyncTool (apache#6885)
  • Loading branch information
pramodbiligiri authored and fengjian committed Apr 5, 2023
1 parent d82e177 commit d327a1a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
Expand Up @@ -30,6 +30,9 @@

import java.util.Properties;

import static org.apache.hudi.sync.datahub.config.HoodieDataHubDatasetIdentifier.DEFAULT_DATAHUB_ENV;
import static org.apache.hudi.sync.datahub.config.HoodieDataHubDatasetIdentifier.DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME;

public class DataHubSyncConfig extends HoodieSyncConfig {

public static final ConfigProperty<String> META_SYNC_DATAHUB_DATASET_IDENTIFIER_CLASS = ConfigProperty
Expand All @@ -52,6 +55,17 @@ public class DataHubSyncConfig extends HoodieSyncConfig {
.noDefaultValue()
.withDocumentation("Pluggable class to supply a DataHub REST emitter to connect to the DataHub instance. This overwrites other emitter configs.");

public static final ConfigProperty<String> META_SYNC_DATAHUB_DATAPLATFORM_NAME = ConfigProperty
.key("hoodie.meta.sync.datahub.dataplatform.name")
.defaultValue(DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME)
.withDocumentation("String used to represent Hudi when creating its corresponding DataPlatform entity "
+ "within Datahub");

public static final ConfigProperty<String> META_SYNC_DATAHUB_DATASET_ENV = ConfigProperty
.key("hoodie.meta.sync.datahub.dataset.env")
.defaultValue(DEFAULT_DATAHUB_ENV.name())
.withDocumentation("Environment to use when pushing entities to Datahub");

public final HoodieDataHubDatasetIdentifier datasetIdentifier;

public DataHubSyncConfig(Properties props) {
Expand Down Expand Up @@ -87,6 +101,13 @@ public static class DataHubSyncConfigParams {
@Parameter(names = {"--emitter-supplier-class"}, description = "Pluggable class to supply a DataHub REST emitter to connect to the DataHub instance. This overwrites other emitter configs.")
public String emitterSupplierClass;

@Parameter(names = {"--data-platform-name"}, description = "String used to represent Hudi when creating its "
+ "corresponding DataPlatform entity within Datahub")
public String dataPlatformName;

@Parameter(names = {"--dataset-env"}, description = "Which Datahub Environment to use when pushing entities")
public String datasetEnv;

public boolean isHelp() {
return hoodieSyncConfigParams.isHelp();
}
Expand All @@ -97,6 +118,8 @@ public Properties toProps() {
props.setPropertyIfNonNull(META_SYNC_DATAHUB_EMITTER_SERVER.key(), emitterServer);
props.setPropertyIfNonNull(META_SYNC_DATAHUB_EMITTER_TOKEN.key(), emitterToken);
props.setPropertyIfNonNull(META_SYNC_DATAHUB_EMITTER_SUPPLIER_CLASS.key(), emitterSupplierClass);
props.setPropertyIfNonNull(META_SYNC_DATAHUB_DATAPLATFORM_NAME.key(), dataPlatformName);
props.setPropertyIfNonNull(META_SYNC_DATAHUB_DATASET_ENV.key(), datasetEnv);
return props;
}
}
Expand Down
Expand Up @@ -27,6 +27,8 @@

import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME;
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME;
import static org.apache.hudi.sync.datahub.config.DataHubSyncConfig.META_SYNC_DATAHUB_DATAPLATFORM_NAME;
import static org.apache.hudi.sync.datahub.config.DataHubSyncConfig.META_SYNC_DATAHUB_DATASET_ENV;

/**
* Construct and provide the default {@link DatasetUrn} to identify the Dataset on DataHub.
Expand All @@ -36,6 +38,7 @@
public class HoodieDataHubDatasetIdentifier {

public static final String DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME = "hudi";
public static final FabricType DEFAULT_DATAHUB_ENV = FabricType.DEV;

protected final Properties props;

Expand All @@ -44,8 +47,20 @@ public HoodieDataHubDatasetIdentifier(Properties props) {
}

public DatasetUrn getDatasetUrn() {
DataPlatformUrn dataPlatformUrn = new DataPlatformUrn(DEFAULT_HOODIE_DATAHUB_PLATFORM_NAME);
DataHubSyncConfig config = new DataHubSyncConfig(props);
return new DatasetUrn(dataPlatformUrn, String.format("%s.%s", config.getString(META_SYNC_DATABASE_NAME), config.getString(META_SYNC_TABLE_NAME)), FabricType.DEV);

return new DatasetUrn(
createDataPlatformUrn(config.getStringOrDefault(META_SYNC_DATAHUB_DATAPLATFORM_NAME)),
createDatasetName(config.getString(META_SYNC_DATABASE_NAME), config.getString(META_SYNC_TABLE_NAME)),
FabricType.valueOf(config.getStringOrDefault(META_SYNC_DATAHUB_DATASET_ENV))
);
}

private static DataPlatformUrn createDataPlatformUrn(String platformUrn) {
return new DataPlatformUrn(platformUrn);
}

private static String createDatasetName(String databaseName, String tableName) {
return String.format("%s.%s", databaseName, tableName);
}
}

0 comments on commit d327a1a

Please sign in to comment.