diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 687c72e3f9f8a..7709769c0f497 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -1568,11 +1568,14 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 2.0.1 + dockerImageTag: 2.0.2 documentationUrl: https://docs.airbyte.com/integrations/sources/salesforce icon: salesforce.svg sourceType: api releaseStage: generally_available + allowedHosts: + hosts: + - "*.salesforce.com" - name: SAP Fieldglass sourceDefinitionId: ec5f3102-fb31-4916-99ae-864faf8e7e25 dockerRepository: airbyte/source-sap-fieldglass diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 1bc7172ea9aa0..07c46b2a7f61b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -13106,7 +13106,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:2.0.1" +- dockerImage: "airbyte/source-salesforce:2.0.2" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/salesforce" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 46b2ca9ce8ee4..a8482a934c280 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -13,5 +13,5 @@ RUN pip install . ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=2.0.1 +LABEL io.airbyte.version=2.0.2 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index 7611b23a8cc4e..b5d7434462b0f 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -7,6 +7,7 @@ import math import os import time +import urllib.parse from abc import ABC from contextlib import closing from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Type, Union @@ -38,7 +39,6 @@ class SalesforceStream(HttpStream, ABC): page_size = 2000 transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) encoding = DEFAULT_ENCODING - MAX_PROPERTIES_LENGTH = Salesforce.REQUEST_SIZE_LIMITS - 2000 def __init__( self, sf_api: Salesforce, pk: str, stream_name: str, sobject_options: Mapping[str, Any] = None, schema: dict = None, **kwargs @@ -50,6 +50,10 @@ def __init__( self.schema: Mapping[str, Any] = schema # type: ignore[assignment] self.sobject_options = sobject_options + @property + def max_properties_length(self) -> int: + return Salesforce.REQUEST_SIZE_LIMITS - len(self.url_base) - 2000 + @property def name(self) -> str: return self.stream_name @@ -69,8 +73,8 @@ def availability_strategy(self) -> Optional["AvailabilityStrategy"]: @property def too_many_properties(self): selected_properties = self.get_json_schema().get("properties", {}) - properties_length = len(",".join(p for p in selected_properties)) - return properties_length > self.MAX_PROPERTIES_LENGTH + properties_length = len(urllib.parse.quote(",".join(p for p in selected_properties))) + return properties_length > self.max_properties_length def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: yield from response.json()["records"] @@ -135,8 +139,8 @@ def chunk_properties(self) -> Iterable[Mapping[str, Any]]: summary_length = 0 local_properties = {} for property_name, value in selected_properties.items(): - current_property_length = len(property_name) + 1 # properties are split with commas - if current_property_length + summary_length >= self.MAX_PROPERTIES_LENGTH: + current_property_length = len(urllib.parse.quote(f"{property_name},")) + if current_property_length + summary_length >= self.max_properties_length: yield local_properties local_properties = {} summary_length = 0 diff --git a/airbyte-integrations/connectors/source-salesforce/unit_tests/api_test.py b/airbyte-integrations/connectors/source-salesforce/unit_tests/api_test.py index 02a5e81db58d6..7bffa1ceaaf06 100644 --- a/airbyte-integrations/connectors/source-salesforce/unit_tests/api_test.py +++ b/airbyte-integrations/connectors/source-salesforce/unit_tests/api_test.py @@ -14,6 +14,7 @@ from airbyte_cdk.models import AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode, SyncMode, Type from conftest import encoding_symbols_parameters, generate_stream from requests.exceptions import HTTPError +from source_salesforce.api import Salesforce from source_salesforce.source import SourceSalesforce from source_salesforce.streams import ( CSV_FIELD_SIZE_LIMIT, @@ -679,3 +680,5 @@ def test_too_many_properties(stream_config, stream_api_v2_pk_too_many_properties {"Id": 3, "propertyA": "A", "propertyB": "B"}, {"Id": 4, "propertyA": "A", "propertyB": "B"} ] + for call in requests_mock.request_history: + assert len(call.url) < Salesforce.REQUEST_SIZE_LIMITS diff --git a/airbyte-integrations/connectors/source-salesforce/unit_tests/conftest.py b/airbyte-integrations/connectors/source-salesforce/unit_tests/conftest.py index 00a813d797abd..8f27a9efd7744 100644 --- a/airbyte-integrations/connectors/source-salesforce/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-salesforce/unit_tests/conftest.py @@ -9,7 +9,6 @@ from airbyte_cdk.models import ConfiguredAirbyteCatalog from source_salesforce.api import Salesforce from source_salesforce.source import SourceSalesforce -from source_salesforce.streams import SalesforceStream @pytest.fixture(autouse=True) @@ -109,7 +108,7 @@ def stream_api_pk(stream_config): @pytest.fixture(scope="module") def stream_api_v2_too_many_properties(stream_config): describe_response_data = { - "fields": [{"name": f"PropertyName{str(i)}", "type": "string"} for i in range(SalesforceStream.MAX_PROPERTIES_LENGTH)] + "fields": [{"name": f"Property{str(i)}", "type": "string"} for i in range(Salesforce.REQUEST_SIZE_LIMITS)] } describe_response_data["fields"].extend([{"name": "BillingAddress", "type": "address"}]) return _stream_api(stream_config, describe_response_data=describe_response_data) @@ -118,7 +117,7 @@ def stream_api_v2_too_many_properties(stream_config): @pytest.fixture(scope="module") def stream_api_v2_pk_too_many_properties(stream_config): describe_response_data = { - "fields": [{"name": f"PropertyName{str(i)}", "type": "string"} for i in range(SalesforceStream.MAX_PROPERTIES_LENGTH)] + "fields": [{"name": f"Property{str(i)}", "type": "string"} for i in range(Salesforce.REQUEST_SIZE_LIMITS)] } describe_response_data["fields"].extend([ {"name": "BillingAddress", "type": "address"}, {"name": "Id", "type": "string"} diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index c4288cfc048c5..5767045d4ab82 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -129,6 +129,7 @@ Now that you have set up the Salesforce source connector, check out the followin | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------| +| 2.0.2 | 2023-02-13 | [22896](https://github.com/airbytehq/airbyte/pull/22896) | Count the URL length based on encoded params | | 2.0.1 | 2023-02-08 | [22597](https://github.com/airbytehq/airbyte/pull/22597) | Make multiple requests if a REST stream has too many properties | | 2.0.0 | 2023-02-02 | [22322](https://github.com/airbytehq/airbyte/pull/22322) | Remove `ActivityMetricRollup` stream | | 1.0.30 | 2023-01-27 | [22016](https://github.com/airbytehq/airbyte/pull/22016) | Set `AvailabilityStrategy` for streams explicitly to `None` |