From d8389a18f3197eb8f1a8cd900ca3abc43af57fbe Mon Sep 17 00:00:00 2001 From: yaojin Date: Sat, 30 May 2026 03:41:40 +0800 Subject: [PATCH] fix: auto-detect GCS endpoint for correct V4 signature config GCS S3-compatible API requires: - virtual-hosted-style addressing (not path-style) - region_name='auto' for V4 signatures Previously both clients were hardcoded to path-style addressing and used the raw S3_REGION value (often empty), causing SignatureDoesNotMatch on every put_object. Now _boto_config() auto-detects storage.googleapis.com and applies the correct settings, while MinIO and other S3 backends keep path-style addressing unchanged. --- backend/app/services/storage_runtime/s3.py | 53 +++++++++++++--------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/backend/app/services/storage_runtime/s3.py b/backend/app/services/storage_runtime/s3.py index 1b6b105d2..d9878e1f6 100644 --- a/backend/app/services/storage_runtime/s3.py +++ b/backend/app/services/storage_runtime/s3.py @@ -49,28 +49,47 @@ def _object_key(self, key: str) -> str: normalized = normalize_storage_key(key) return f"{self.prefix}/{normalized}" if self.prefix else normalized + def _is_gcs(self) -> bool: + """Return True if the endpoint targets Google Cloud Storage.""" + if not self.endpoint_url: + return False + return "storage.googleapis.com" in self.endpoint_url + + def _boto_config(self): + """Build a botocore Config appropriate for the target endpoint.""" + from botocore.config import Config + + if self._is_gcs(): + # GCS S3-compatible API requires virtual-hosted-style addressing + # and an explicit region of "auto" for V4 signatures to verify. + addressing = "virtual" + region = "auto" + else: + addressing = "path" + region = self.region or None + return Config( + max_pool_connections=self.max_pool_connections, + proxies={}, + s3={"addressing_style": addressing}, + signature_version="s3v4", + connect_timeout=5, + read_timeout=30, + tcp_keepalive=True, + region_name=region, + ) + def _client_or_raise(self): if self._client is None: try: import boto3 - from botocore.config import Config except ImportError as exc: raise RuntimeError("boto3 is required for S3 storage backend") from exc self._client = boto3.client( "s3", - region_name=self.region or None, endpoint_url=self.endpoint_url, aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key, - config=Config( - max_pool_connections=self.max_pool_connections, - proxies={}, - s3={"addressing_style": "path"}, - signature_version="s3v4", - connect_timeout=5, - read_timeout=30, - tcp_keepalive=True, - ), + config=self._boto_config(), ) return self._client @@ -79,26 +98,16 @@ async def _async_client(self): """Shared aioboto3 session with aiohttp connection pool — reuses connections but detects stale ones correctly.""" try: import aioboto3 - from botocore.config import Config except ImportError as exc: raise RuntimeError("aioboto3 is required for async S3 writes") from exc if self._aioboto3_session is None: self._aioboto3_session = aioboto3.Session() async with self._aioboto3_session.client( "s3", - region_name=self.region or None, endpoint_url=self.endpoint_url, aws_access_key_id=self.access_key_id, aws_secret_access_key=self.secret_access_key, - config=Config( - max_pool_connections=self.max_pool_connections, - proxies={}, - s3={"addressing_style": "path"}, - signature_version="s3v4", - connect_timeout=5, - read_timeout=30, - tcp_keepalive=True, - ), + config=self._boto_config(), ) as client: yield client