From 144d937eabba1f6fa0b0a075bf8ea72231da0582 Mon Sep 17 00:00:00 2001 From: Emanuel Ferm Date: Thu, 21 Aug 2025 15:02:07 -0400 Subject: [PATCH 1/4] strip whitespace --- airbyte_cdk/sources/file_based/file_types/csv_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index e3010690e..cf3a82e22 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -128,7 +128,7 @@ def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) # Then read the header self._skip_rows(fp, config_format.skip_rows_before_header) reader = csv.reader(fp, dialect=dialect_name) # type: ignore - headers = list(next(reader)) + headers = [header.strip() for header in next(reader)] fp.seek(0) return headers From f207fd8110f64a4d548897981a5ffa493e44af7d Mon Sep 17 00:00:00 2001 From: Emanuel Ferm Date: Thu, 21 Aug 2025 15:02:14 -0400 Subject: [PATCH 2/4] add unit test --- .../file_based/file_types/test_csv_parser.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/unit_tests/sources/file_based/file_types/test_csv_parser.py b/unit_tests/sources/file_based/file_types/test_csv_parser.py index 0b49dd66d..8302e57e6 100644 --- a/unit_tests/sources/file_based/file_types/test_csv_parser.py +++ b/unit_tests/sources/file_based/file_types/test_csv_parser.py @@ -658,6 +658,20 @@ def test_read_data_with_encoding_error(self) -> None: assert "encoding" in ate.value.message assert self._csv_reader._get_headers.called + def test_read_data_with_whitespace_in_header(self) -> None: + self._stream_reader.open_file.return_value = ( + CsvFileBuilder() + .with_data( + [ + "header1 ,\theader2", + "1,2", + ] + ) + .build() + ) + data_generator = self._read_data() + assert list(data_generator) == [{"header1": "1", "header2": "2"}] + def _read_data(self) -> Generator[Dict[str, str], None, None]: data_generator = self._csv_reader.read_data( self._config, From 571a46f1afa4306b17716c37fc0df42db2640ae3 Mon Sep 17 00:00:00 2001 From: Emanuel Ferm Date: Thu, 21 Aug 2025 18:03:21 -0400 Subject: [PATCH 3/4] rename --- unit_tests/sources/file_based/file_types/test_csv_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/sources/file_based/file_types/test_csv_parser.py b/unit_tests/sources/file_based/file_types/test_csv_parser.py index 8302e57e6..7470b6e33 100644 --- a/unit_tests/sources/file_based/file_types/test_csv_parser.py +++ b/unit_tests/sources/file_based/file_types/test_csv_parser.py @@ -658,7 +658,7 @@ def test_read_data_with_encoding_error(self) -> None: assert "encoding" in ate.value.message assert self._csv_reader._get_headers.called - def test_read_data_with_whitespace_in_header(self) -> None: + def test_read_data_strips_leading_and_trailing_whitespace_in_header(self) -> None: self._stream_reader.open_file.return_value = ( CsvFileBuilder() .with_data( From 5f6b9a822354d6ff8ded699423abb7b9b12f1c1f Mon Sep 17 00:00:00 2001 From: Emanuel Ferm Date: Thu, 21 Aug 2025 18:51:19 -0400 Subject: [PATCH 4/4] remove unnecessary strip --- airbyte_cdk/sources/file_based/file_types/csv_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index cf3a82e22..8c0de1d93 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -209,7 +209,7 @@ async def infer_schema( failure_type=FailureType.config_error, ) schema = { - header.strip(): {"type": type_inferred.infer()} + header: {"type": type_inferred.infer()} for header, type_inferred in type_inferrer_by_field.items() } data_generator.close()