diff --git a/client/python/apache_polaris/cli/command/__init__.py b/client/python/apache_polaris/cli/command/__init__.py index 3db3b3ca39..9f098e84a6 100644 --- a/client/python/apache_polaris/cli/command/__init__.py +++ b/client/python/apache_polaris/cli/command/__init__.py @@ -64,6 +64,7 @@ def options_get(key, f=lambda x: x): properties={} if properties is None else properties, set_properties={} if set_properties is None else set_properties, hadoop_warehouse=options_get(Arguments.HADOOP_WAREHOUSE), + hive_warehouse=options_get(Arguments.HIVE_WAREHOUSE), iceberg_remote_catalog_name=options_get(Arguments.ICEBERG_REMOTE_CATALOG_NAME), remove_properties=[] if remove_properties is None else remove_properties, endpoint=options_get(Arguments.ENDPOINT), diff --git a/client/python/apache_polaris/cli/command/catalogs.py b/client/python/apache_polaris/cli/command/catalogs.py index 0abc9c58ee..e183d25ee9 100644 --- a/client/python/apache_polaris/cli/command/catalogs.py +++ b/client/python/apache_polaris/cli/command/catalogs.py @@ -35,7 +35,7 @@ PolarisCatalog, CatalogProperties, BearerAuthenticationParameters, \ ImplicitAuthenticationParameters, \ OAuthClientCredentialsParameters, SigV4AuthenticationParameters, \ - HadoopConnectionConfigInfo, \ + HadoopConnectionConfigInfo, HiveConnectionConfigInfo, \ IcebergRestConnectionConfigInfo, AwsIamServiceIdentityInfo @@ -70,6 +70,7 @@ class CatalogsCommand(Command): set_properties: Dict[str, StrictStr] remove_properties: List[str] hadoop_warehouse: str + hive_warehouse: str iceberg_remote_catalog_name: str endpoint: str endpoint_internal: str @@ -121,8 +122,13 @@ def validate(self): f" and {Argument.to_flag_name(Arguments.CATALOG_SIGNING_REGION)}") if self.catalog_connection_type == CatalogConnectionType.HADOOP.value: if not self.hadoop_warehouse or not self.catalog_uri: - raise Exception(f"Missing required argument for connection type 'HADOOP':" + raise Exception(f"Missing required argument for connection type 'hadoop':" f" {Argument.to_flag_name(Arguments.HADOOP_WAREHOUSE)}" + f" and {Argument.to_flag_name(Arguments.CATALOG_URI)}") + elif self.catalog_connection_type == CatalogConnectionType.HIVE.value: + if not self.hive_warehouse or not self.catalog_uri: + raise Exception(f"Missing required argument for connection type 'hive':" + f" {Argument.to_flag_name(Arguments.HIVE_WAREHOUSE)}" f" and {Argument.to_flag_name(Arguments.CATALOG_URI)}") if self.catalog_service_identity_type == ServiceIdentityType.AWS_IAM.value: if not self.catalog_service_identity_iam_arn: @@ -278,6 +284,14 @@ def _build_connection_config_info(self): service_identity=service_identity, remote_catalog_name=self.iceberg_remote_catalog_name ) + elif self.catalog_connection_type == CatalogConnectionType.HIVE.value: + config = HiveConnectionConfigInfo( + connection_type=self.catalog_connection_type.upper(), + uri=self.catalog_uri, + authentication_parameters=auth_params, + service_identity=service_identity, + warehouse=self.hive_warehouse + ) elif self.catalog_connection_type is not None: raise Exception("Unknown catalog connection type:", self.catalog_connection_type) return config diff --git a/client/python/apache_polaris/cli/constants.py b/client/python/apache_polaris/cli/constants.py index 151dbee24e..8cb09ff1b6 100644 --- a/client/python/apache_polaris/cli/constants.py +++ b/client/python/apache_polaris/cli/constants.py @@ -55,6 +55,7 @@ class CatalogConnectionType(Enum): HADOOP = "hadoop" ICEBERG = "iceberg-rest" + HIVE = "hive" class AuthenticationType(Enum): @@ -172,6 +173,7 @@ class Arguments: PROXY = "proxy" DEBUG = "debug" HADOOP_WAREHOUSE = "hadoop_warehouse" + HIVE_WAREHOUSE = "hive_warehouse" ICEBERG_REMOTE_CATALOG_NAME = "iceberg_remote_catalog_name" ENDPOINT = "endpoint" ENDPOINT_INTERNAL = "endpoint_internal" @@ -272,7 +274,7 @@ class Update: class External: CATALOG_CONNECTION_TYPE = ( - "The type of external catalog in [ICEBERG, HADOOP]." + "The type of external catalog in [ICEBERG, HADOOP, HIVE]." ) CATALOG_AUTHENTICATION_TYPE = ( "The type of authentication in [OAUTH, BEARER, SIGV4, IMPLICIT]" @@ -289,6 +291,9 @@ class External: HADOOP_WAREHOUSE = ( "The warehouse to use when federating to a HADOOP catalog" ) + HIVE_WAREHOUSE = ( + "The warehouse to use when federating to a HIVE catalog" + ) ICEBERG_REMOTE_CATALOG_NAME = ( "The remote catalog name when federating to an Iceberg REST catalog" ) diff --git a/client/python/apache_polaris/cli/options/option_tree.py b/client/python/apache_polaris/cli/options/option_tree.py index 4ffe0f7fc2..3e93cceefd 100644 --- a/client/python/apache_polaris/cli/options/option_tree.py +++ b/client/python/apache_polaris/cli/options/option_tree.py @@ -85,6 +85,8 @@ class OptionTree: Hints.Catalogs.External.ICEBERG_REMOTE_CATALOG_NAME), Argument(Arguments.HADOOP_WAREHOUSE, str, Hints.Catalogs.External.HADOOP_WAREHOUSE), + Argument(Arguments.HIVE_WAREHOUSE, str, + Hints.Catalogs.External.HIVE_WAREHOUSE), Argument(Arguments.CATALOG_AUTHENTICATION_TYPE, str, Hints.Catalogs.External.CATALOG_AUTHENTICATION_TYPE, lower=True, choices=[at.value for at in AuthenticationType]), diff --git a/client/python/test/test_cli_parsing.py b/client/python/test/test_cli_parsing.py index 168b8e17da..196223feea 100644 --- a/client/python/test/test_cli_parsing.py +++ b/client/python/test/test_cli_parsing.py @@ -204,6 +204,10 @@ def get(obj, arg_string): '--allowed-location', 'a', '--allowed-location', 'b', '--role-arn', 'ra', '--default-base-location', 'x']), 'gcs') + check_exception(lambda: mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external', + '--storage-type', 'file', '--default-base-location', 'dbl', + '--catalog-connection-type', 'hive', '--catalog-authentication-type', 'implicit']), + '--hive-warehouse') # Test various correct commands: check_arguments( @@ -585,6 +589,35 @@ def get(obj, arg_string): (0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'IMPLICIT', (0, 'catalog.connection_config_info.uri'): 'u', }) + check_arguments( + mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external', + '--storage-type', 'file', '--default-base-location', 'dbl', + '--catalog-connection-type', 'hive', '--hive-warehouse', 'h', + '--catalog-authentication-type', 'implicit', '--catalog-uri', 'u']), + 'create_catalog', { + (0, 'catalog.name'): 'my-catalog', + (0, 'catalog.type'): 'EXTERNAL', + (0, 'catalog.connection_config_info.connection_type'): 'HIVE', + (0, 'catalog.connection_config_info.warehouse'): 'h', + (0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'IMPLICIT', + (0, 'catalog.connection_config_info.uri'): 'u', + }) + check_arguments( + mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external', + '--storage-type', 'file', '--default-base-location', 'dbl', + '--catalog-connection-type', 'hive', '--hive-warehouse', '/warehouse/path', + '--catalog-authentication-type', 'oauth', '--catalog-uri', 'thrift://hive-metastore:9083', + '--catalog-token-uri', 'http://auth-server/token', '--catalog-client-id', 'test-client', + '--catalog-client-secret', 'test-secret', '--catalog-client-scope', 'read', + '--catalog-client-scope', 'write']), + 'create_catalog', { + (0, 'catalog.name'): 'my-catalog', + (0, 'catalog.type'): 'EXTERNAL', + (0, 'catalog.connection_config_info.connection_type'): 'HIVE', + (0, 'catalog.connection_config_info.warehouse'): '/warehouse/path', + (0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'OAUTH', + (0, 'catalog.connection_config_info.uri'): 'thrift://hive-metastore:9083', + }) check_arguments( mock_execute(['principals', 'reset', 'test', '--new-client-id', 'e469c048cf866df1', '--new-client-secret', 'e469c048cf866dfae469c048cf866df1']), diff --git a/site/content/in-dev/unreleased/command-line-interface.md b/site/content/in-dev/unreleased/command-line-interface.md index c455ec80f2..f27aa05f0a 100644 --- a/site/content/in-dev/unreleased/command-line-interface.md +++ b/site/content/in-dev/unreleased/command-line-interface.md @@ -156,9 +156,10 @@ options: --consent-url (Only for Azure) A consent URL granting permissions for the Azure Storage location --service-account (Only for GCS) The service account to use when connecting to GCS --property A key/value pair such as: tag=value. Multiple can be provided by specifying this option more than once - --catalog-connection-type The type of external catalog in [iceberg-rest, hadoop]. + --catalog-connection-type The type of external catalog in [iceberg-rest, hadoop, hive]. --iceberg-remote-catalog-name The remote catalog name when federating to an Iceberg REST catalog --hadoop-warehouse The warehouse to use when federating to a HADOOP catalog + --hive-warehouse The warehouse to use when federating to a HIVE catalog --catalog-authentication-type The type of authentication in [OAUTH, BEARER, SIGV4, IMPLICIT] --catalog-service-identity-type The type of service identity in [AWS_IAM] --catalog-service-identity-iam-arn When using the AWS_IAM service identity type, this is the ARN of the IAM user or IAM role Polaris uses to assume roles and then access external resources.