Skip to content

Commit

Permalink
🐛Source File: added parser error handling (#26275)
Browse files Browse the repository at this point in the history
* added parser error handling

* updated versions, added changelog

* Automated Change

---------

Co-authored-by: darynaishchenko <darynaishchenko@users.noreply.github.com>
  • Loading branch information
darynaishchenko and darynaishchenko committed May 25, 2023
1 parent 44c23d1 commit 2a1d7f3
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 59 deletions.
4 changes: 2 additions & 2 deletions airbyte-integrations/connectors/source-file-secure/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM airbyte/source-file:0.3.8
FROM airbyte/source-file:0.3.9

WORKDIR /airbyte/integration_code
COPY source_file_secure ./source_file_secure
Expand All @@ -9,5 +9,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.3.8
LABEL io.airbyte.version=0.3.9
LABEL io.airbyte.name=airbyte/source-file-secure
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerImageTag: 0.3.8
dockerImageTag: 0.3.9
dockerRepository: airbyte/source-file-secure
githubIssueLabel: source-file
icon: file.svg
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.3.8
LABEL io.airbyte.version=0.3.9
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
date,key,new_confirmed,new_deceased,new_recovered,new_tested,total_confirmed,total_deceased,total_recovered,total_tested
2020-09-24,AE,1002,1,,93618,88532,407,,9130551
2020-09-24,AF,0,0,,,39170,1451,,
2020-09-24,AM,392,2,,,48643,947,,
2020-09-24,AT,688,6,,18518,41246,783,,1507782
4 changes: 2 additions & 2 deletions airbyte-integrations/connectors/source-file/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerImageTag: 0.3.8
dockerImageTag: 0.3.9
dockerRepository: airbyte/source-file
githubIssueLabel: source-file
icon: file.svg
Expand All @@ -14,7 +14,7 @@ data:
registries:
cloud:
dockerRepository: airbyte/source-file-secure
dockerImageTag: 0.3.8 # Dont forget to publish source-file-secure as well when updating this.
dockerImageTag: 0.3.9 # Dont forget to publish source-file-secure as well when updating this.
enabled: true
oss:
enabled: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from openpyxl import load_workbook
from openpyxl.utils.exceptions import InvalidFileException
from paramiko import SSHException
from pandas.errors import ParserError
from urllib3.exceptions import ProtocolError
from yaml import safe_load

Expand Down Expand Up @@ -426,6 +427,10 @@ def read(self, fields: Iterable = None) -> Iterable[dict]:
)
logger.error(f"{error_msg}\n{traceback.format_exc()}")
raise ConfigurationError(error_msg) from err
except ParserError as err:
error_msg = f"File {fp} can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html"
logger.error(f"{error_msg}\n{traceback.format_exc()}")
raise ConfigurationError(error_msg) from err

def _cache_stream(self, fp):
"""cache stream to file"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
SyncMode,
Type,
)
from source_file.client import ConfigurationError
from source_file.source import SourceFile

logger = logging.getLogger("airbyte")
Expand Down Expand Up @@ -206,3 +207,20 @@ def test_pandas_header_none(absolute_path, test_files):
{"0": "text11", "1": "text12"},
{"0": "text21", "1": "text22"},
]


def test_incorrect_reader_options(absolute_path, test_files):
config = {
"dataset_name": "test",
"format": "csv",
"reader_options": json.dumps({"sep": "4", "nrows": 20}),
"url": f"{absolute_path}/{test_files}/test_parser_error.csv",
"provider": {"storage": "local"},
}

catalog = get_catalog({"0": {"type": ["string", "null"]}, "1": {"type": ["string", "null"]}})
source = SourceFile()
with pytest.raises(ConfigurationError) as e:
records = source.read(logger=logger, config=deepcopy(config), catalog=catalog)
records = [r.record.data for r in records]
assert "can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html" in str(e.value)
107 changes: 54 additions & 53 deletions docs/integrations/sources/file.md

Large diffs are not rendered by default.

0 comments on commit 2a1d7f3

Please sign in to comment.