Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion client/python/cli/command/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
StorageConfigInfo, ExternalCatalog, AwsStorageConfigInfo, AzureStorageConfigInfo, GcpStorageConfigInfo, \
PolarisCatalog, CatalogProperties, BearerAuthenticationParameters, ImplicitAuthenticationParameters, \
OAuthClientCredentialsParameters, SigV4AuthenticationParameters, HadoopConnectionConfigInfo, \
IcebergRestConnectionConfigInfo, AwsIamServiceIdentityInfo
IcebergRestConnectionConfigInfo, HiveConnectionConfigInfo, AwsIamServiceIdentityInfo


@dataclass
Expand Down Expand Up @@ -114,6 +114,11 @@ def validate(self):
if self.catalog_connection_type == CatalogConnectionType.HADOOP.value:
if not self.hadoop_warehouse or not self.catalog_uri:
raise Exception(f"Missing required argument for connection type 'HADOOP':"
f" {Argument.to_flag_name(Arguments.HADOOP_WAREHOUSE)}"
f" and {Argument.to_flag_name(Arguments.CATALOG_URI)}")
elif self.catalog_connection_type == CatalogConnectionType.HIVE.value:
if not self.hadoop_warehouse or not self.catalog_uri:
raise Exception(f"Missing required argument for connection type 'HIVE':"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: --help was updated to list iceberg-rest, hadoop, hive so these logs (and the tests) should follow that same convention

f" {Argument.to_flag_name(Arguments.HADOOP_WAREHOUSE)}"
f" and {Argument.to_flag_name(Arguments.CATALOG_URI)}")
if self.catalog_service_identity_type == ServiceIdentityType.AWS_IAM.value:
Expand Down Expand Up @@ -268,6 +273,14 @@ def _build_connection_config_info(self):
service_identity=service_identity,
remote_catalog_name=self.iceberg_remote_catalog_name
)
elif self.catalog_connection_type == CatalogConnectionType.HIVE.value:
config = HiveConnectionConfigInfo(
connection_type=self.catalog_connection_type.upper(),
uri=self.catalog_uri,
authentication_parameters=auth_params,
service_identity=service_identity,
warehouse=self.hadoop_warehouse
Copy link
Contributor Author

@flyrain flyrain Oct 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's weird to use hadoop_warehouse. We might use the name warehouse, so that it can be applied to both Hadoop and Hive federation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts, suggestions? @HonahX @eric-maynard @MonkeyCanCode

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sry for the late response. I was away for couple of days. I don't have a strong preference over this naming. Maybe ask @eric-maynard as he initially added it in eb6b6ad.

Copy link
Contributor

@eric-maynard eric-maynard Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tentative convention I had in mind was to prefix each argument with the federation type it's specific to -- namely, ICEBERG_REMOTE_CATALOG_NAME is specific to iceberg federation type, and HADOOP_WAREHOUSE is specific to HADOOP. I thought it might be unclear what just REMOTE_CATALOG_NAME or WAREHOUSE meant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be clear I'm supportive of re-using these flags across federations types (e.g. across Hive / Hadoop) and indeed I think if we ever flesh out federation in the way that I envisioned at this time we would need to. There would be a way to federate to a Hive catalog both for Iceberg and non-Iceberg tables, and these would surely share arguments.

My only hesitation would be that the CLI 's method of handling arguments is a bit brittle and if we re-use them we should just make sure the parsing and the --help display behave the way we expect.

)
elif self.catalog_connection_type is not None:
raise Exception("Unknown catalog connection type:", self.catalog_connection_type)
return config
Expand Down
1 change: 1 addition & 0 deletions client/python/cli/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class CatalogConnectionType(Enum):

HADOOP = "hadoop"
ICEBERG = "iceberg-rest"
HIVE = "hive"


class AuthenticationType(Enum):
Expand Down
33 changes: 33 additions & 0 deletions client/python/test/test_cli_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,10 @@ def get(obj, arg_string):
'--allowed-location', 'a', '--allowed-location', 'b',
'--role-arn', 'ra', '--default-base-location', 'x']),
'gcs')
check_exception(lambda: mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external',
'--storage-type', 'file', '--default-base-location', 'dbl',
'--catalog-connection-type', 'hive', '--catalog-authentication-type', 'implicit']),
'--hadoop-warehouse')

# Test various correct commands:
check_arguments(
Expand Down Expand Up @@ -586,6 +590,35 @@ def get(obj, arg_string):
(0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'IMPLICIT',
(0, 'catalog.connection_config_info.uri'): 'u',
})
check_arguments(
mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external',
'--storage-type', 'file', '--default-base-location', 'dbl',
'--catalog-connection-type', 'hive', '--hadoop-warehouse', 'h',
'--catalog-authentication-type', 'implicit', '--catalog-uri', 'u']),
'create_catalog', {
(0, 'catalog.name'): 'my-catalog',
(0, 'catalog.type'): 'EXTERNAL',
(0, 'catalog.connection_config_info.connection_type'): 'HIVE',
(0, 'catalog.connection_config_info.warehouse'): 'h',
(0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'IMPLICIT',
(0, 'catalog.connection_config_info.uri'): 'u',
})
check_arguments(
mock_execute(['catalogs', 'create', 'my-catalog', '--type', 'external',
'--storage-type', 'file', '--default-base-location', 'dbl',
'--catalog-connection-type', 'hive', '--hadoop-warehouse', '/warehouse/path',
'--catalog-authentication-type', 'oauth', '--catalog-uri', 'thrift://hive-metastore:9083',
'--catalog-token-uri', 'http://auth-server/token', '--catalog-client-id', 'test-client',
'--catalog-client-secret', 'test-secret', '--catalog-client-scope', 'read',
'--catalog-client-scope', 'write']),
'create_catalog', {
(0, 'catalog.name'): 'my-catalog',
(0, 'catalog.type'): 'EXTERNAL',
(0, 'catalog.connection_config_info.connection_type'): 'HIVE',
(0, 'catalog.connection_config_info.warehouse'): '/warehouse/path',
(0, 'catalog.connection_config_info.authentication_parameters.authentication_type'): 'OAUTH',
(0, 'catalog.connection_config_info.uri'): 'thrift://hive-metastore:9083',
})

check_arguments(
mock_execute(['principals', 'reset', 'test', '--new-client-id', 'e469c048cf866df1', '--new-client-secret', 'e469c048cf866dfae469c048cf866df1']),
Expand Down
2 changes: 1 addition & 1 deletion site/content/in-dev/unreleased/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ options:
--consent-url (Only for Azure) A consent URL granting permissions for the Azure Storage location
--service-account (Only for GCS) The service account to use when connecting to GCS
--property A key/value pair such as: tag=value. Multiple can be provided by specifying this option more than once
--catalog-connection-type The type of external catalog in [ICEBERG, HADOOP].
--catalog-connection-type The type of external catalog in [iceberg-rest, hadoop, hive].
--iceberg-remote-catalog-name The remote catalog name when federating to an Iceberg REST catalog
--hadoop-warehouse The warehouse to use when federating to a HADOOP catalog
--catalog-authentication-type The type of authentication in [OAUTH, BEARER, SIGV4, IMPLICIT]
Expand Down