From 87409d3b227b9bc3342f317c9486a3ac8f56c843 Mon Sep 17 00:00:00 2001 From: jialuo Date: Mon, 8 Sep 2025 23:17:45 +0000 Subject: [PATCH] feat: Support VPC egress setting in remote function --- bigframes/functions/_function_client.py | 21 +++++++++++++++++++ bigframes/functions/_function_session.py | 11 ++++++++++ bigframes/pandas/__init__.py | 4 ++++ bigframes/session/__init__.py | 11 ++++++++++ .../large/functions/test_remote_function.py | 8 ++++++- 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index a8c9f9c301..d994d6353a 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -51,6 +51,15 @@ } ) +# https://cloud.google.com/functions/docs/reference/rest/v2/projects.locations.functions#vpconnectoregresssettings +_VPC_EGRESS_SETTINGS_MAP = types.MappingProxyType( + { + "all": functions_v2.ServiceConfig.VpcConnectorEgressSettings.ALL_TRAFFIC, + "private-ranges-only": functions_v2.ServiceConfig.VpcConnectorEgressSettings.PRIVATE_RANGES_ONLY, + "unspecified": functions_v2.ServiceConfig.VpcConnectorEgressSettings.VPC_CONNECTOR_EGRESS_SETTINGS_UNSPECIFIED, + } +) + # BQ managed functions (@udf) currently only support Python 3.11. _MANAGED_FUNC_PYTHON_VERSION = "python-3.11" @@ -375,6 +384,7 @@ def create_cloud_function( max_instance_count=None, is_row_processor=False, vpc_connector=None, + vpc_connector_egress_settings="private-ranges-only", memory_mib=1024, ingress_settings="internal-only", ): @@ -472,6 +482,15 @@ def create_cloud_function( function.service_config.max_instance_count = max_instance_count if vpc_connector is not None: function.service_config.vpc_connector = vpc_connector + if vpc_connector_egress_settings not in _VPC_EGRESS_SETTINGS_MAP: + raise bf_formatting.create_exception_with_feedback_link( + ValueError, + f"'{vpc_connector_egress_settings}' not one of the supported vpc egress settings values: {list(_VPC_EGRESS_SETTINGS_MAP)}", + ) + function.service_config.vpc_connector_egress_settings = cast( + functions_v2.ServiceConfig.VpcConnectorEgressSettings, + _VPC_EGRESS_SETTINGS_MAP[vpc_connector_egress_settings], + ) function.service_config.service_account_email = ( self._cloud_function_service_account ) @@ -532,6 +551,7 @@ def provision_bq_remote_function( cloud_function_max_instance_count, is_row_processor, cloud_function_vpc_connector, + cloud_function_vpc_connector_egress_settings, cloud_function_memory_mib, cloud_function_ingress_settings, bq_metadata, @@ -580,6 +600,7 @@ def provision_bq_remote_function( max_instance_count=cloud_function_max_instance_count, is_row_processor=is_row_processor, vpc_connector=cloud_function_vpc_connector, + vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings, memory_mib=cloud_function_memory_mib, ingress_settings=cloud_function_ingress_settings, ) diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index a2fb66539b..6b5c9bf071 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -245,6 +245,9 @@ def remote_function( cloud_function_timeout: Optional[int] = 600, cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, + cloud_function_vpc_connector_egress_settings: Literal[ + "all", "private-ranges-only", "unspecified" + ] = "private-ranges-only", cloud_function_memory_mib: Optional[int] = 1024, cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" @@ -425,6 +428,13 @@ def remote_function( function. This is useful if your code needs access to data or service(s) that are on a VPC network. See for more details https://cloud.google.com/functions/docs/networking/connecting-vpc. + cloud_function_vpc_connector_egress_settings (str, Optional): + Egress settings for the VPC connector, controlling what outbound + traffic is routed through the VPC connector. + Options are: `all`, `private-ranges-only`, or `unspecified`. + If not specified, `private-ranges-only` is used by default. + See for more details + https://cloud.google.com/run/docs/configuring/vpc-connectors#egress-job. cloud_function_memory_mib (int, Optional): The amounts of memory (in mebibytes) to allocate for the cloud function (2nd gen) created. This also dictates a corresponding @@ -616,6 +626,7 @@ def wrapper(func): cloud_function_max_instance_count=cloud_function_max_instances, is_row_processor=is_row_processor, cloud_function_vpc_connector=cloud_function_vpc_connector, + cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, bq_metadata=bqrf_metadata, diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 6ffed5b53f..9d4fc101f6 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -87,6 +87,9 @@ def remote_function( cloud_function_timeout: Optional[int] = 600, cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, + cloud_function_vpc_connector_egress_settings: Literal[ + "all", "private-ranges-only", "unspecified" + ] = "private-ranges-only", cloud_function_memory_mib: Optional[int] = 1024, cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" @@ -109,6 +112,7 @@ def remote_function( cloud_function_timeout=cloud_function_timeout, cloud_function_max_instances=cloud_function_max_instances, cloud_function_vpc_connector=cloud_function_vpc_connector, + cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, cloud_build_service_account=cloud_build_service_account, diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index df67e64e9e..6252a59e31 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1510,6 +1510,9 @@ def remote_function( cloud_function_timeout: Optional[int] = 600, cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, + cloud_function_vpc_connector_egress_settings: Literal[ + "all", "private-ranges-only", "unspecified" + ] = "private-ranges-only", cloud_function_memory_mib: Optional[int] = 1024, cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" @@ -1675,6 +1678,13 @@ def remote_function( function. This is useful if your code needs access to data or service(s) that are on a VPC network. See for more details https://cloud.google.com/functions/docs/networking/connecting-vpc. + cloud_function_vpc_connector_egress_settings (str, Optional): + Egress settings for the VPC connector, controlling what outbound + traffic is routed through the VPC connector. + Options are: `all`, `private-ranges-only`, or `unspecified`. + If not specified, `private-ranges-only` is used by default. + See for more details + https://cloud.google.com/run/docs/configuring/vpc-connectors#egress-job. cloud_function_memory_mib (int, Optional): The amounts of memory (in mebibytes) to allocate for the cloud function (2nd gen) created. This also dictates a corresponding @@ -1732,6 +1742,7 @@ def remote_function( cloud_function_timeout=cloud_function_timeout, cloud_function_max_instances=cloud_function_max_instances, cloud_function_vpc_connector=cloud_function_vpc_connector, + cloud_function_vpc_connector_egress_settings=cloud_function_vpc_connector_egress_settings, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, cloud_build_service_account=cloud_build_service_account, diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index f60786437f..22b623193d 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -1478,14 +1478,20 @@ def square_num(x): reuse=False, cloud_function_service_account="default", cloud_function_vpc_connector=gcf_vpc_connector, + cloud_function_vpc_connector_egress_settings="all", cloud_function_ingress_settings="all", )(square_num) - # assert that the GCF is created with the intended vpc connector gcf = rf_session.cloudfunctionsclient.get_function( name=square_num_remote.bigframes_cloud_function ) + + # assert that the GCF is created with the intended vpc connector and + # egress settings. assert gcf.service_config.vpc_connector == gcf_vpc_connector + # The value is since we set + # cloud_function_vpc_connector_egress_settings="all" earlier. + assert gcf.service_config.vpc_connector_egress_settings == 2 # assert that the function works as expected on data scalars_df, scalars_pandas_df = scalars_dfs