Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cl_sii/dte/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def parse_dte_xml(xml_doc: XmlElement) -> data_models.DteDataL2:
# TODO: change response type to a dataclass like 'DteXmlData'.
# TODO: separate the XML parsing stage from the deserialization stage, which could be
# performed by XML-agnostic code (perhaps using Marshmallow or data clacases?).
# See :class:`cl_sii.rcv.parse.RcvCsvRowSchema`.
# See :class:`cl_sii.rcv.parse_csv.RcvVentaCsvRowSchema`.

if not isinstance(xml_doc, (XmlElement, XmlElementTree)):
raise TypeError("'xml_doc' must be an 'XmlElement'.")
Expand Down
61 changes: 0 additions & 61 deletions cl_sii/rcv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,3 @@
http://www.sii.cl/preguntas_frecuentes/catastro/001_012_6971.htm

"""
import csv
import io
from typing import Callable

from . import parse


def process_rcv_csv_file(
text_stream: io.TextIOBase,
rcv_owner_rut: str,
row_data_handler: Callable,
max_data_rows: int = None,
) -> int:
"""
Process a RCV CSV file.

Processing steps:
- Create a CSV reader, with auto-detection of header names (first row).
- Instantiate an schema to parse and deserialize each row.
- For each data row:
- Using an appropriate schema, deserialize the raw data.
- Apply ``row_data_handler`` to the deserialization output.

:param text_stream: a file-like object, not necessarily a real file
:param rcv_owner_rut: RCV file owner's RUT
:param row_data_handler: function be called with parsed row data
:param max_data_rows: max number of data rows to process (raise exception if exceeded);
``None`` means no limit
:return: number of data rows processed

"""
# TODO: convert to iterator. That way we do not need the 'row_data_handler' and we can also use
# the same function to retrieve the collection of deserialized rows.

csv_reader = parse.create_rcv_csv_reader(text_stream, expected_fields_strict=True)
schema = parse.RcvCsvRowSchema(context=dict(receptor_rut=rcv_owner_rut))

try:
for row_ix, row_data in enumerate(csv_reader, start=1):
if max_data_rows is not None and row_ix > max_data_rows:
# TODO: custom exception
raise Exception("Exceeded 'max_data_rows' value: {}.".format(max_data_rows))

try:
deserialized_row_data = schema.deserialize_csv_row(row_data)
except Exception as exc:
exc_msg = "Error deserializing row {} of CSV file: {}".format(row_ix, exc)
raise Exception(exc_msg) from exc
try:
row_data_handler(row_ix, deserialized_row_data)
except Exception as exc:
exc_msg = "Error in row_data_handler for row {} of CSV file: {}".format(row_ix, exc)
raise Exception(exc_msg) from exc

# The first row in the CSV file is not a data row; it is the headers row.
rows_processed = csv_reader.line_num - 1
except csv.Error as exc:
exc_msg = "CSV error for line {} of CSV file: {}".format(csv_reader.line_num, exc)
raise Exception(exc_msg) from exc

return rows_processed
195 changes: 0 additions & 195 deletions cl_sii/rcv/parse.py

This file was deleted.

Loading