From c771c5ce5f1e05d017cfc1c2e77f69c22e7d3b7e Mon Sep 17 00:00:00 2001 From: jun09 <48198880+jun09@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:59:25 +0800 Subject: [PATCH] fix(amazon): Support AWS China region endpoints in RedshiftSQLHook OpenLineage identifier parsing The _get_identifier_from_hostname method in RedshiftSQLHook only handled global AWS endpoints (amazonaws.com) but not AWS China region endpoints (amazonaws.com.cn). This caused the OpenLineage authority part to fall back to the full hostname instead of correctly parsing cluster_identifier.region_name. Global endpoint format (6 dot-separated parts): my-cluster.id.us-east-1.redshift.amazonaws.com China endpoint format (7 dot-separated parts): my-cluster.id.cn-north-1.redshift.amazonaws.com.cn The same issue affects both provisioned clusters and Redshift Serverless workgroups in cn-north-1 and cn-northwest-1. --- .../providers/amazon/aws/hooks/redshift_sql.py | 4 ++++ .../tests/unit/amazon/aws/hooks/test_redshift_sql.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/hooks/redshift_sql.py b/providers/amazon/src/airflow/providers/amazon/aws/hooks/redshift_sql.py index 0a55c2bdc26c2..d3ce58190426a 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/hooks/redshift_sql.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/hooks/redshift_sql.py @@ -285,6 +285,10 @@ def _get_identifier_from_hostname(self, hostname: str) -> str: parts = hostname.split(".") if hostname.endswith("amazonaws.com") and len(parts) == 6: return f"{parts[0]}.{parts[2]}" + # AWS China regions use the amazonaws.com.cn endpoint suffix + # e.g. my-cluster.id.cn-north-1.redshift.amazonaws.com.cn (7 parts vs 6 for global) + if hostname.endswith("amazonaws.com.cn") and len(parts) == 7: + return f"{parts[0]}.{parts[2]}" self.log.debug( """Could not parse identifier from hostname '%s'. You are probably using IP to connect to Redshift cluster. diff --git a/providers/amazon/tests/unit/amazon/aws/hooks/test_redshift_sql.py b/providers/amazon/tests/unit/amazon/aws/hooks/test_redshift_sql.py index c8aaa5ac938f6..80a5877e4952c 100644 --- a/providers/amazon/tests/unit/amazon/aws/hooks/test_redshift_sql.py +++ b/providers/amazon/tests/unit/amazon/aws/hooks/test_redshift_sql.py @@ -297,6 +297,18 @@ def test_get_conn_iam_does_not_mutate_connection(self, mock_connect, mock_aws_ho {}, "1.2.3.4", ), + # test with AWS China region endpoint (provisioned cluster) + ( + "cluster_identifier_from_host.id.cn-north-1.redshift.amazonaws.com.cn", + {"iam": True}, + "cluster_identifier_from_host.cn-north-1", + ), + # test with AWS China region endpoint (serverless) + ( + "workgroup-name.account-id.cn-northwest-1.redshift-serverless.amazonaws.com.cn", + {"iam": True}, + "workgroup-name.cn-northwest-1", + ), ], ) def test_get_openlineage_redshift_authority_part(