From 69a137a2f7321a63f0d22b08ad1797694fa3006b Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Sun, 12 Oct 2025 15:48:21 -0700 Subject: [PATCH 1/5] Add Hive federation option in CLI # Conflicts: # client/python/apache_polaris/cli/command/catalogs.py # site/content/in-dev/unreleased/command-line-interface.md --- .../apache_polaris/cli/command/catalogs.py | 17 ++++++++-- client/python/apache_polaris/cli/constants.py | 2 ++ client/python/test/test_cli_parsing.py | 33 +++++++++++++++++++ .../unreleased/command-line-interface.md | 2 +- 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/client/python/apache_polaris/cli/command/catalogs.py b/client/python/apache_polaris/cli/command/catalogs.py index 0abc9c58ee..5dd88e75d0 100644 --- a/client/python/apache_polaris/cli/command/catalogs.py +++ b/client/python/apache_polaris/cli/command/catalogs.py @@ -35,7 +35,7 @@ PolarisCatalog, CatalogProperties, BearerAuthenticationParameters, \ ImplicitAuthenticationParameters, \ OAuthClientCredentialsParameters, SigV4AuthenticationParameters, \ - HadoopConnectionConfigInfo, \ + HadoopConnectionConfigInfo, HiveConnectionConfigInfo\ IcebergRestConnectionConfigInfo, AwsIamServiceIdentityInfo @@ -121,8 +121,13 @@ def validate(self): f" and {Argument.to_flag_name(Arguments.CATALOG_SIGNING_REGION)}") if self.catalog_connection_type == CatalogConnectionType.HADOOP.value: if not self.hadoop_warehouse or not self.catalog_uri: - raise Exception(f"Missing required argument for connection type 'HADOOP':" + raise Exception(f"Missing required argument for connection type 'hadoop':" f" {Argument.to_flag_name(Arguments.HADOOP_WAREHOUSE)}" + f" and {Argument.to_flag_name(Arguments.CATALOG_URI)}") + elif self.catalog_connection_type == CatalogConnectionType.HIVE.value: + if not self.hive_warehouse or not self.catalog_uri: + raise Exception(f"Missing required argument for connection type 'hive':" + f" {Argument.to_flag_name(Arguments.HIVE_WAREHOUSE)}" f" and {Argument.to_flag_name(Arguments.CATALOG_URI)}") if self.catalog_service_identity_type == ServiceIdentityType.AWS_IAM.value: if not self.catalog_service_identity_iam_arn: @@ -278,6 +283,14 @@ def _build_connection_config_info(self): service_identity=service_identity, remote_catalog_name=self.iceberg_remote_catalog_name ) + elif self.catalog_connection_type == CatalogConnectionType.HIVE.value: + config = HiveConnectionConfigInfo( + connection_type=self.catalog_connection_type.upper(), + uri=self.catalog_uri, + authentication_parameters=auth_params, + service_identity=service_identity, + warehouse=self.hive_warehouse + ) elif self.catalog_connection_type is not None: raise Exception("Unknown catalog connection type:", self.catalog_connection_type) return config diff --git a/client/python/apache_polaris/cli/constants.py b/client/python/apache_polaris/cli/constants.py index 151dbee24e..b6549e5287 100644 --- a/client/python/apache_polaris/cli/constants.py +++ b/client/python/apache_polaris/cli/constants.py @@ -55,6 +55,7 @@ class CatalogConnectionType(Enum): HADOOP = "hadoop" ICEBERG = "iceberg-rest" + HIVE = "hive" class AuthenticationType(Enum): @@ -172,6 +173,7 @@ class Arguments: PROXY = "proxy" DEBUG = "debug" HADOOP_WAREHOUSE = "hadoop_warehouse" + HIVE_WAREHOUSE = "hive_warehouse" ICEBERG_REMOTE_CATALOG_NAME = "iceberg_remote_catalog_name" ENDPOINT = "endpoint" ENDPOINT_INTERNAL = "endpoint_internal" diff --git a/client/python/test/test_cli_parsing.py b/client/python/test/test_cli_parsing.py index 168b8e17da..196223feea 100644 --- a/client/python/test/test_cli_parsing.py +++ b/client/python/test/test_cli_parsing.py @@ -204,6 +204,10 @@ def get(obj, arg_string): '--allowed-location', 'a', '--allowed-location', 'b', '--role-arn', 'ra', '--default-base-location', 'x']), 'gcs') + check_exception(lambda: mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external', + '--storage-type', 'file', '--default-base-location', 'dbl', + '--catalog-connection-type', 'hive', '--catalog-authentication-type', 'implicit']), + '--hive-warehouse') # Test various correct commands: check_arguments( @@ -585,6 +589,35 @@ def get(obj, arg_string): (0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'IMPLICIT', (0, 'catalog.connection_config_info.uri'): 'u', }) + check_arguments( + mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external', + '--storage-type', 'file', '--default-base-location', 'dbl', + '--catalog-connection-type', 'hive', '--hive-warehouse', 'h', + '--catalog-authentication-type', 'implicit', '--catalog-uri', 'u']), + 'create_catalog', { + (0, 'catalog.name'): 'my-catalog', + (0, 'catalog.type'): 'EXTERNAL', + (0, 'catalog.connection_config_info.connection_type'): 'HIVE', + (0, 'catalog.connection_config_info.warehouse'): 'h', + (0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'IMPLICIT', + (0, 'catalog.connection_config_info.uri'): 'u', + }) + check_arguments( + mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external', + '--storage-type', 'file', '--default-base-location', 'dbl', + '--catalog-connection-type', 'hive', '--hive-warehouse', '/warehouse/path', + '--catalog-authentication-type', 'oauth', '--catalog-uri', 'thrift://hive-metastore:9083', + '--catalog-token-uri', 'http://auth-server/token', '--catalog-client-id', 'test-client', + '--catalog-client-secret', 'test-secret', '--catalog-client-scope', 'read', + '--catalog-client-scope', 'write']), + 'create_catalog', { + (0, 'catalog.name'): 'my-catalog', + (0, 'catalog.type'): 'EXTERNAL', + (0, 'catalog.connection_config_info.connection_type'): 'HIVE', + (0, 'catalog.connection_config_info.warehouse'): '/warehouse/path', + (0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'OAUTH', + (0, 'catalog.connection_config_info.uri'): 'thrift://hive-metastore:9083', + }) check_arguments( mock_execute(['principals', 'reset', 'test', '--new-client-id', 'e469c048cf866df1', '--new-client-secret', 'e469c048cf866dfae469c048cf866df1']), diff --git a/site/content/in-dev/unreleased/command-line-interface.md b/site/content/in-dev/unreleased/command-line-interface.md index c455ec80f2..e976ecd9ac 100644 --- a/site/content/in-dev/unreleased/command-line-interface.md +++ b/site/content/in-dev/unreleased/command-line-interface.md @@ -156,7 +156,7 @@ options: --consent-url (Only for Azure) A consent URL granting permissions for the Azure Storage location --service-account (Only for GCS) The service account to use when connecting to GCS --property A key/value pair such as: tag=value. Multiple can be provided by specifying this option more than once - --catalog-connection-type The type of external catalog in [iceberg-rest, hadoop]. + --catalog-connection-type The type of external catalog in [iceberg-rest, hadoop, hive]. --iceberg-remote-catalog-name The remote catalog name when federating to an Iceberg REST catalog --hadoop-warehouse The warehouse to use when federating to a HADOOP catalog --catalog-authentication-type The type of authentication in [OAUTH, BEARER, SIGV4, IMPLICIT] From b637d39747974ad651487f9f9bae925b58d80166 Mon Sep 17 00:00:00 2001 From: Yufei Date: Tue, 2 Dec 2025 18:12:58 -0800 Subject: [PATCH 2/5] Add new constant hive warehouse --- site/content/in-dev/unreleased/command-line-interface.md | 1 + 1 file changed, 1 insertion(+) diff --git a/site/content/in-dev/unreleased/command-line-interface.md b/site/content/in-dev/unreleased/command-line-interface.md index e976ecd9ac..f27aa05f0a 100644 --- a/site/content/in-dev/unreleased/command-line-interface.md +++ b/site/content/in-dev/unreleased/command-line-interface.md @@ -159,6 +159,7 @@ options: --catalog-connection-type The type of external catalog in [iceberg-rest, hadoop, hive]. --iceberg-remote-catalog-name The remote catalog name when federating to an Iceberg REST catalog --hadoop-warehouse The warehouse to use when federating to a HADOOP catalog + --hive-warehouse The warehouse to use when federating to a HIVE catalog --catalog-authentication-type The type of authentication in [OAUTH, BEARER, SIGV4, IMPLICIT] --catalog-service-identity-type The type of service identity in [AWS_IAM] --catalog-service-identity-iam-arn When using the AWS_IAM service identity type, this is the ARN of the IAM user or IAM role Polaris uses to assume roles and then access external resources. From 436b16d012c680216e605ea0d39cc19a95ee00b3 Mon Sep 17 00:00:00 2001 From: Yufei Date: Tue, 2 Dec 2025 18:15:09 -0800 Subject: [PATCH 3/5] Add new constant hive warehouse --- client/python/apache_polaris/cli/command/catalogs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/python/apache_polaris/cli/command/catalogs.py b/client/python/apache_polaris/cli/command/catalogs.py index 5dd88e75d0..5b0feb1a89 100644 --- a/client/python/apache_polaris/cli/command/catalogs.py +++ b/client/python/apache_polaris/cli/command/catalogs.py @@ -35,7 +35,7 @@ PolarisCatalog, CatalogProperties, BearerAuthenticationParameters, \ ImplicitAuthenticationParameters, \ OAuthClientCredentialsParameters, SigV4AuthenticationParameters, \ - HadoopConnectionConfigInfo, HiveConnectionConfigInfo\ + HadoopConnectionConfigInfo, HiveConnectionConfigInfo, \ IcebergRestConnectionConfigInfo, AwsIamServiceIdentityInfo From 55cbfca236e7cc9c4da4a3a907b649ff0f1d94c5 Mon Sep 17 00:00:00 2001 From: Yufei Date: Tue, 2 Dec 2025 18:19:59 -0800 Subject: [PATCH 4/5] Add new constant hive warehouse --- client/python/apache_polaris/cli/command/__init__.py | 1 + client/python/apache_polaris/cli/command/catalogs.py | 1 + 2 files changed, 2 insertions(+) diff --git a/client/python/apache_polaris/cli/command/__init__.py b/client/python/apache_polaris/cli/command/__init__.py index 3db3b3ca39..9f098e84a6 100644 --- a/client/python/apache_polaris/cli/command/__init__.py +++ b/client/python/apache_polaris/cli/command/__init__.py @@ -64,6 +64,7 @@ def options_get(key, f=lambda x: x): properties={} if properties is None else properties, set_properties={} if set_properties is None else set_properties, hadoop_warehouse=options_get(Arguments.HADOOP_WAREHOUSE), + hive_warehouse=options_get(Arguments.HIVE_WAREHOUSE), iceberg_remote_catalog_name=options_get(Arguments.ICEBERG_REMOTE_CATALOG_NAME), remove_properties=[] if remove_properties is None else remove_properties, endpoint=options_get(Arguments.ENDPOINT), diff --git a/client/python/apache_polaris/cli/command/catalogs.py b/client/python/apache_polaris/cli/command/catalogs.py index 5b0feb1a89..e183d25ee9 100644 --- a/client/python/apache_polaris/cli/command/catalogs.py +++ b/client/python/apache_polaris/cli/command/catalogs.py @@ -70,6 +70,7 @@ class CatalogsCommand(Command): set_properties: Dict[str, StrictStr] remove_properties: List[str] hadoop_warehouse: str + hive_warehouse: str iceberg_remote_catalog_name: str endpoint: str endpoint_internal: str From 6147ea24329b6f5c5e8b2ddf76f68756c523cebb Mon Sep 17 00:00:00 2001 From: Yufei Date: Tue, 2 Dec 2025 18:23:46 -0800 Subject: [PATCH 5/5] Add new constant hive warehouse --- client/python/apache_polaris/cli/constants.py | 5 ++++- client/python/apache_polaris/cli/options/option_tree.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/client/python/apache_polaris/cli/constants.py b/client/python/apache_polaris/cli/constants.py index b6549e5287..8cb09ff1b6 100644 --- a/client/python/apache_polaris/cli/constants.py +++ b/client/python/apache_polaris/cli/constants.py @@ -274,7 +274,7 @@ class Update: class External: CATALOG_CONNECTION_TYPE = ( - "The type of external catalog in [ICEBERG, HADOOP]." + "The type of external catalog in [ICEBERG, HADOOP, HIVE]." ) CATALOG_AUTHENTICATION_TYPE = ( "The type of authentication in [OAUTH, BEARER, SIGV4, IMPLICIT]" @@ -291,6 +291,9 @@ class External: HADOOP_WAREHOUSE = ( "The warehouse to use when federating to a HADOOP catalog" ) + HIVE_WAREHOUSE = ( + "The warehouse to use when federating to a HIVE catalog" + ) ICEBERG_REMOTE_CATALOG_NAME = ( "The remote catalog name when federating to an Iceberg REST catalog" ) diff --git a/client/python/apache_polaris/cli/options/option_tree.py b/client/python/apache_polaris/cli/options/option_tree.py index 4ffe0f7fc2..3e93cceefd 100644 --- a/client/python/apache_polaris/cli/options/option_tree.py +++ b/client/python/apache_polaris/cli/options/option_tree.py @@ -85,6 +85,8 @@ class OptionTree: Hints.Catalogs.External.ICEBERG_REMOTE_CATALOG_NAME), Argument(Arguments.HADOOP_WAREHOUSE, str, Hints.Catalogs.External.HADOOP_WAREHOUSE), + Argument(Arguments.HIVE_WAREHOUSE, str, + Hints.Catalogs.External.HIVE_WAREHOUSE), Argument(Arguments.CATALOG_AUTHENTICATION_TYPE, str, Hints.Catalogs.External.CATALOG_AUTHENTICATION_TYPE, lower=True, choices=[at.value for at in AuthenticationType]),