Skip to content

Commit

Permalink
馃悰 Source Salesforce: increase CSV field_size_limit (#10012)
Browse files Browse the repository at this point in the history
* Increase CSV field_size_limit

Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
  • Loading branch information
grubberr committed Feb 4, 2022
1 parent 82ba043 commit 84d7323
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "b117307c-14b6-41aa-9422-947e34922962",
"name": "Salesforce",
"dockerRepository": "airbyte/source-salesforce",
"dockerImageTag": "0.1.20",
"dockerImageTag": "0.1.22",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/salesforce",
"icon": "salesforce.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@
- name: Salesforce
sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962
dockerRepository: airbyte/source-salesforce
dockerImageTag: 0.1.21
dockerImageTag: 0.1.22
documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce
icon: salesforce.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6801,7 +6801,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-salesforce:0.1.21"
- dockerImage: "airbyte/source-salesforce:0.1.22"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/salesforce"
connectionSpecification:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ COPY source_salesforce ./source_salesforce
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.21
LABEL io.airbyte.version=0.1.22
LABEL io.airbyte.name=airbyte/source-salesforce
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#

import csv
import ctypes
import io
import math
import time
Expand All @@ -20,6 +21,10 @@
from .api import UNSUPPORTED_FILTERING_STREAMS, Salesforce
from .rate_limiting import default_backoff_handler

# https://stackoverflow.com/a/54517228
CSV_FIELD_SIZE_LIMIT = int(ctypes.c_ulong(-1).value // 2)
csv.field_size_limit(CSV_FIELD_SIZE_LIMIT)


class SalesforceStream(HttpStream, ABC):
page_size = 2000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from requests.exceptions import HTTPError
from source_salesforce.api import Salesforce
from source_salesforce.source import SourceSalesforce
from source_salesforce.streams import BulkIncrementalSalesforceStream, BulkSalesforceStream, IncrementalSalesforceStream, SalesforceStream
from source_salesforce.streams import (
CSV_FIELD_SIZE_LIMIT,
BulkIncrementalSalesforceStream,
BulkSalesforceStream,
IncrementalSalesforceStream,
SalesforceStream,
)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -548,7 +554,8 @@ def test_csv_reader_dialect_unix():
data = [
{"Id": "1", "Name": '"first_name" "last_name"'},
{"Id": "2", "Name": "'" + 'first_name"\n' + "'" + 'last_name\n"'},
{"Id": "3", "Name": "first_name last_name"},
{"Id": "3", "Name": "first_name last_name" + 1024 * 1024 * "e"},
{"Id": "4", "Name": "first_name last_name"},
]

with io.StringIO("", newline="") as csvfile:
Expand All @@ -562,3 +569,21 @@ def test_csv_reader_dialect_unix():
m.register_uri("GET", url + "/results", text=text)
result = [dict(i[1]) for i in stream.download_data(url)]
assert result == data


def test_csv_field_size_limit():
DEFAULT_CSV_FIELD_SIZE_LIMIT = 1024 * 128

field_size = 1024 * 1024
text = '"Id","Name"\n"1","' + field_size * "a" + '"\n'

csv.field_size_limit(DEFAULT_CSV_FIELD_SIZE_LIMIT)
reader = csv.reader(io.StringIO(text))
with pytest.raises(csv.Error):
for _ in reader:
pass

csv.field_size_limit(CSV_FIELD_SIZE_LIMIT)
reader = csv.reader(io.StringIO(text))
for _ in reader:
pass
1 change: 1 addition & 0 deletions docs/integrations/sources/salesforce.md
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,7 @@ List of available streams:

| Version | Date | Pull Request | Subject |
|:--------|:-----------| :--- |:--------------------------------------------------------------------------|
| 0.1.22 | 2022-02-02 | [10012](https://github.com/airbytehq/airbyte/pull/10012) | Increase CSV field_size_limit |
| 0.1.21 | 2022-01-28 | [9499](https://github.com/airbytehq/airbyte/pull/9499) | If a sync reaches daily rate limit it ends the sync early with success status. Read more in `Performance considerations` section |
| 0.1.20 | 2022-01-26 | [9757](https://github.com/airbytehq/airbyte/pull/9757) | Parse CSV with "unix" dialect |
| 0.1.19 | 2022-01-25 | [8617](https://github.com/airbytehq/airbyte/pull/8617) | Update connector fields title/description |
Expand Down

0 comments on commit 84d7323

Please sign in to comment.