From 0984b00e8a0a957569c364716af74c2ee271e614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Sat, 4 May 2019 15:37:43 -0400 Subject: [PATCH 1/8] scripts: improve 'example.py' Based on (almost identical to) https://github.com/fynpal/lib-cl-sii-api-python/blob/74e19d9/scripts/example.py --- scripts/example.py | 56 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/scripts/example.py b/scripts/example.py index 48644f3f..2372c1c0 100755 --- a/scripts/example.py +++ b/scripts/example.py @@ -3,14 +3,20 @@ Example script ============== +Does X and Y, and then Z. -Example:: +Example +------- + +For example, to do X, run:: ./scripts/example.py arg1 arg2 arg3 """ +import logging import os import sys +from datetime import datetime from typing import Sequence try: @@ -22,9 +28,53 @@ import cl_sii # noqa: F401 +logger = logging.getLogger(__name__) +root_logger = logging.getLogger() + + +############################################################################### +# logging config +############################################################################### + +_loggers = [logger, logging.getLogger('cl_sii')] +for _logger in _loggers: + _logger.addHandler(logging.StreamHandler()) + _logger.setLevel(logging.INFO) + +root_logger.setLevel(logging.WARNING) + + +############################################################################### +# script +############################################################################### + def main(args: Sequence[str]) -> None: - print("Example script.") - print(f"Args: {args!s}") + start_ts = datetime.now() + + logger.debug("Example script. Args: %s", args) + + try: + print("Action: do something") + except FileNotFoundError: + logger.exception( + "Process aborted: a file could not be opened.", exc_info=True) + except KeyboardInterrupt: + logger.error("Process interrupted by user.") + except Exception: + logger.exception("Process aborted.") + finally: + try: + print("Action: clean up resources and connections") + logger.info("Cleaned up resources and connections.") + except Exception: + logger.exception("Failed to clean up resources and connections.") + + finish_ts = datetime.now() + duration = finish_ts - start_ts + + logger.info(f"start: {start_ts.isoformat()}") + logger.info(f"finish: {finish_ts.isoformat()}") + logger.info(f"duration: {duration!s}") if __name__ == '__main__': From c0fea79ccfa6503f71aeabf58a8c3715e0b2a1cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Thu, 23 May 2019 22:18:22 -0400 Subject: [PATCH 2/8] dte.data_models: add `DteDataL2.as_dte_data_l1` --- cl_sii/dte/data_models.py | 9 +++++++++ tests/test_dte_data_models.py | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/cl_sii/dte/data_models.py b/cl_sii/dte/data_models.py index 7184b31b..beff1846 100644 --- a/cl_sii/dte/data_models.py +++ b/cl_sii/dte/data_models.py @@ -439,3 +439,12 @@ def __post_init__(self) -> None: raise TypeError("Inappropriate type of 'receptor_email'.") validate_clean_str(self.receptor_email) validate_non_empty_str(self.receptor_email) + + def as_dte_data_l1(self) -> DteDataL1: + return DteDataL1( + emisor_rut=self.emisor_rut, + tipo_dte=self.tipo_dte, + folio=self.folio, + fecha_emision_date=self.fecha_emision_date, + receptor_rut=self.receptor_rut, + monto_total=self.monto_total) diff --git a/tests/test_dte_data_models.py b/tests/test_dte_data_models.py index e0ae146c..01d705fb 100644 --- a/tests/test_dte_data_models.py +++ b/tests/test_dte_data_models.py @@ -241,6 +241,19 @@ def test_as_dict(self) -> None: receptor_email=None, )) + def test_as_dte_data_l1(self) -> None: + self.assertEqual( + self.dte_l2_1.as_dte_data_l1(), + DteDataL1( + emisor_rut=Rut('76354771-K'), + tipo_dte=TipoDteEnum.FACTURA_ELECTRONICA, + folio=170, + fecha_emision_date=date(2019, 4, 1), + receptor_rut=Rut('96790240-3'), + monto_total=2996301, + ) + ) + class FunctionsTest(unittest.TestCase): From 3c68ba2526a43a8f4eb447238399178fcec94ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Fri, 3 May 2019 22:38:31 -0400 Subject: [PATCH 3/8] libs.mm_utils: add `validate_no_unexpected_input_fields` --- cl_sii/libs/mm_utils.py | 42 +++++++++++++++++++++++++++++++++++++ tests/test_libs_mm_utils.py | 13 ++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/cl_sii/libs/mm_utils.py b/cl_sii/libs/mm_utils.py index 52d33411..8f319bea 100644 --- a/cl_sii/libs/mm_utils.py +++ b/cl_sii/libs/mm_utils.py @@ -6,6 +6,48 @@ import marshmallow.utils +############################################################################### +# validators +############################################################################### + +def validate_no_unexpected_input_fields( + schema: marshmallow.Schema, + data: dict, + original_data: dict, +) -> None: + """ + Fail validation if there was an unexpected input field. + + Usage:: + + class MySchema(marshmallow.Schema): + + class Meta: + strict = True + + folio = marshmallow.fields.Integer() + + @marshmallow.validates_schema(pass_original=True) + def validate_schema(self, data: dict, original_data: dict) -> None: + validate_no_unexpected_input_fields(self, data, original_data) + + """ + # Original inspiration from + # https://marshmallow.readthedocs.io/en/2.x-line/extending.html#validating-original-input-data + fields_name_or_load_from = { + field.name if field.load_from is None else field.load_from + for field_key, field in schema.fields.items() + } + unexpected_input_fields = set(original_data) - fields_name_or_load_from + if unexpected_input_fields: + raise marshmallow.ValidationError( + "Unexpected input field.", field_names=list(unexpected_input_fields)) + + +############################################################################### +# fields +############################################################################### + class CustomMarshmallowDateField(marshmallow.fields.Field): """ A formatted date string. diff --git a/tests/test_libs_mm_utils.py b/tests/test_libs_mm_utils.py index d1a0c8b3..6957a42c 100644 --- a/tests/test_libs_mm_utils.py +++ b/tests/test_libs_mm_utils.py @@ -1,10 +1,19 @@ import unittest -from cl_sii.libs.mm_utils import CustomMarshmallowDateField # noqa: F401 +from cl_sii.libs.mm_utils import ( # noqa: F401 + CustomMarshmallowDateField, validate_no_unexpected_input_fields, +) class CustomMarshmallowDateFieldTest(unittest.TestCase): def test_x(self) -> None: - # TODO: implement! + # TODO: implement for 'CustomMarshmallowDateField'. + pass + + +class FunctionsTest(unittest.TestCase): + + def test_validate_no_unexpected_input_fields(self): + # TODO: implement for 'validate_no_unexpected_input_fields'. pass From 5c8ee3939023561debb69694cb5a354d6911220f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Fri, 3 May 2019 23:09:06 -0400 Subject: [PATCH 4/8] libs: add module `csv_utils` --- cl_sii/libs/csv_utils.py | 49 ++++++++++++++++++++++++++++++++++++ tests/test_libs_csv_utils.py | 10 ++++++++ 2 files changed, 59 insertions(+) create mode 100644 cl_sii/libs/csv_utils.py create mode 100644 tests/test_libs_csv_utils.py diff --git a/cl_sii/libs/csv_utils.py b/cl_sii/libs/csv_utils.py new file mode 100644 index 00000000..c3a910d9 --- /dev/null +++ b/cl_sii/libs/csv_utils.py @@ -0,0 +1,49 @@ +import csv +from typing import IO, Sequence, Type, Union + + +def create_csv_dict_reader( + text_stream: IO[str], + csv_dialect: Type[csv.Dialect], + row_dict_extra_fields_key: Union[str, None] = None, + expected_fields_strict: bool = True, + expected_field_names: Sequence[str] = None, +) -> csv.DictReader: + """ + Create a CSV dict reader with custom options. + + :param text_stream: + :param row_dict_extra_fields_key: + CSV row dict key under which the extra data in the row will be saved + :param csv_dialect: + :param expected_fields_strict: + :param expected_field_names: + (required if ``expected_field_names`` is True) + :return: a CSV DictReader + + """ + # note: mypy wrongly complains: it does not accept 'fieldnames' to be None but that value + # is completely acceptable, and it even is the default! + # > error: Argument "fieldnames" to "DictReader" has incompatible type "None"; expected + # > "Sequence[str]" + # note: mypy wrongly complains: + # > Argument "dialect" to "DictReader" has incompatible type "Type[Dialect]"; + # > expected "Union[str, Dialect]" + csv_reader = csv.DictReader( # type: ignore + text_stream, + fieldnames=None, # the values of the first row will be used as the fieldnames + restkey=row_dict_extra_fields_key, + dialect=csv_dialect, + ) + + if expected_fields_strict: + if expected_field_names: + if tuple(csv_reader.fieldnames) != expected_field_names: + raise ValueError( + "CSV file field names do not match those expected, or their order.", + csv_reader.fieldnames) + else: + raise ValueError( + "Param 'expected_field_names' is required if 'expected_fields_strict' is True.") + + return csv_reader diff --git a/tests/test_libs_csv_utils.py b/tests/test_libs_csv_utils.py new file mode 100644 index 00000000..c1669199 --- /dev/null +++ b/tests/test_libs_csv_utils.py @@ -0,0 +1,10 @@ +import unittest + +from cl_sii.libs.csv_utils import create_csv_dict_reader # noqa: F401 + + +class FunctionsTest(unittest.TestCase): + + def test_create_csv_dict_reader(self): + # TODO: implement for 'create_csv_dict_reader'. + pass From 260636a27ede2836d9e00fe86763a44c49044494 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Fri, 3 May 2019 23:09:31 -0400 Subject: [PATCH 5/8] libs: add module `rows_processing` --- cl_sii/libs/rows_processing.py | 153 +++++++++++++++++++++++++++++ tests/test_libs_rows_processing.py | 16 +++ 2 files changed, 169 insertions(+) create mode 100644 cl_sii/libs/rows_processing.py create mode 100644 tests/test_libs_rows_processing.py diff --git a/cl_sii/libs/rows_processing.py b/cl_sii/libs/rows_processing.py new file mode 100644 index 00000000..b906e549 --- /dev/null +++ b/cl_sii/libs/rows_processing.py @@ -0,0 +1,153 @@ +import csv +import logging + +from typing import Dict, Iterable, Sequence, Tuple + +import marshmallow + + +logger = logging.getLogger(__name__) + + +class MaxRowsExceeded(RuntimeError): + + """ + The maximum number of rows has been exceeded. + """ + + +############################################################################### +# iterators +############################################################################### + +def csv_rows_mm_deserialization_iterator( + csv_reader: csv.DictReader, + row_schema: marshmallow.Schema, + n_rows_offset: int = 0, + max_n_rows: int = None, + fields_to_remove_names: Sequence[str] = None, +) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]: + """ + Marshmallow deserialization iterator over CSV rows. + + Iterate over ``csv_reader``, deserialize each row using ``row_schema`` + and yield the data before and after deserialization, plus any + validation/deserialization errors. + + .. note:: The CSV header row is omitted, obviously. + + :param csv_reader: + :param row_schema: + Marshmallow schema for deserializing each CSV row + :param n_rows_offset: + (optional) number of rows to skip (and not deserialize) + :param max_n_rows: + (optional) max number of rows to deserialize (raise exception + if exceeded); ``None`` means no limit + :param fields_to_remove_names: + (optional) the name of each field that must be removed (if it exists) + from the row + :returns: + yields a tuple of (``row_ix`` (1-based), ``row_data``, + ``deserialized_row_data``, ``validation_errors``) + :raises MaxRowsExceeded: + number of data rows processed exceeded ``max_n_rows`` + :raises RuntimeError: + on CSV error when iterating over ``csv_reader`` + + """ + # note: mypy complaint is wrong because a 'csv.DictReader' object can be iterated over + # and yields instances of 'Dict[str, object]'. + # > Incompatible types in assignment (expression has type "DictReader", variable has type + # > "Iterable[Dict[str, object]]") + rows_iterator: Iterable[Dict[str, object]] = csv_reader # type: ignore + iterator = rows_mm_deserialization_iterator( + rows_iterator, row_schema, n_rows_offset, max_n_rows, fields_to_remove_names) + + try: + # note: we chose not to use 'yield from' to be explicit about what we are yielding. + for row_ix, row_data, deserialized_row_data, validation_errors in iterator: + yield row_ix, row_data, deserialized_row_data, validation_errors + except csv.Error as exc: + exc_msg = f"CSV error for line {csv_reader.line_num} of CSV file." + raise RuntimeError(exc_msg) from exc + + +def rows_mm_deserialization_iterator( + rows_iterator: Iterable[Dict[str, object]], + row_schema: marshmallow.Schema, + n_rows_offset: int = 0, + max_n_rows: int = None, + fields_to_remove_names: Sequence[str] = None, +) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]: + """ + Marshmallow deserialization iterator. + + Iterate over ``rows_iterator``, deserialize each row using ``row_schema`` + and yield the data before and after deserialization, plus any + validation/deserialization errors. + + :param rows_iterator: + :param row_schema: + Marshmallow schema for deserializing each row + :param n_rows_offset: + (optional) number of rows to skip (and not deserialize) + :param max_n_rows: + (optional) max number of rows to deserialize (raise exception + if exceeded); ``None`` means no limit + :param fields_to_remove_names: + (optional) the name of each field that must be removed (if it exists) + from the row + :returns: + yields a tuple of (``row_ix`` (1-based), ``row_data``, + ``deserialized_row_data``, ``validation_errors``) + :raises MaxRowsExceeded: + number of data rows processed exceeded ``max_n_rows`` + + """ + if not n_rows_offset >= 0: + raise ValueError("Param 'n_rows_offset' must be an integer >= 0.") + + fields_to_remove_names = fields_to_remove_names or () + + for row_ix, row_data in enumerate(rows_iterator, start=1): + if max_n_rows is not None and row_ix > max_n_rows + n_rows_offset: + raise MaxRowsExceeded(f"Exceeded 'max_n_rows' limit: {max_n_rows}.") + + if row_ix <= n_rows_offset: + continue + + for _field_name in fields_to_remove_names: + row_data.pop(_field_name, None) + + try: + mm_result: marshmallow.UnmarshalResult = row_schema.load(row_data) + deserialized_row_data: dict = mm_result.data + raised_validation_errors: dict = {} + returned_validation_errors: dict = mm_result.errors + except marshmallow.ValidationError as exc: + deserialized_row_data = {} + raised_validation_errors = dict(exc.normalized_messages()) + returned_validation_errors = {} + + validation_errors = raised_validation_errors + if returned_validation_errors: + if row_schema.strict: + # 'marshmallow.schema.BaseSchema': + # > :param bool strict: If `True`, raise errors if invalid data are passed in + # > instead of failing silently and storing the errors. + logger.error( + "Marshmallow schema is 'strict' but validation errors were returned by " + "method 'load' ('UnmarshalResult.errors') instead of being raised. " + "Errors: %s", + repr(returned_validation_errors)) + if raised_validation_errors: + logger.fatal( + "Programming error: either returned or raised validation errors " + "(depending on 'strict') but never both. " + "Returned errors: %s. Raised errors: %s", + repr(returned_validation_errors), repr(raised_validation_errors)) + + validation_errors.update(returned_validation_errors) + + yield row_ix, row_data, deserialized_row_data, validation_errors diff --git a/tests/test_libs_rows_processing.py b/tests/test_libs_rows_processing.py new file mode 100644 index 00000000..7cd9d2a0 --- /dev/null +++ b/tests/test_libs_rows_processing.py @@ -0,0 +1,16 @@ +import unittest + +from cl_sii.libs.rows_processing import ( # noqa: F401 + csv_rows_mm_deserialization_iterator, rows_mm_deserialization_iterator, +) + + +class FunctionsTest(unittest.TestCase): + + def test_csv_rows_mm_deserialization_iterator(self): + # TODO: implement for 'csv_rows_mm_deserialization_iterator'. + pass + + def test_rows_mm_deserialization_iterator(self): + # TODO: implement for 'rows_mm_deserialization_iterator'. + pass From c4fbfc132c47cd7b5569541a75271116ab9779e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Fri, 24 May 2019 20:15:55 -0400 Subject: [PATCH 6/8] rcv: add module `parse_csv` Includes function `parse_rcv_venta_csv_file`. Tests have not been implemented. --- cl_sii/rcv/parse_csv.py | 406 ++++++++++++++++++++++++++++++++++++ tests/test_rcv_parse_csv.py | 22 ++ 2 files changed, 428 insertions(+) create mode 100644 cl_sii/rcv/parse_csv.py create mode 100644 tests/test_rcv_parse_csv.py diff --git a/cl_sii/rcv/parse_csv.py b/cl_sii/rcv/parse_csv.py new file mode 100644 index 00000000..392c4e7a --- /dev/null +++ b/cl_sii/rcv/parse_csv.py @@ -0,0 +1,406 @@ +""" +Parse RCV files (CSV) +===================== + + +""" +import csv +from datetime import date +import logging +from typing import Dict, Iterable, Optional, Sequence, Tuple + +import marshmallow +import marshmallow.fields +import marshmallow.validate + +from cl_sii.dte.data_models import DteDataL2 +from cl_sii.extras import mm_fields +from cl_sii.libs import csv_utils +from cl_sii.libs import mm_utils +from cl_sii.libs import rows_processing +from cl_sii.libs import tz_utils +from cl_sii.rut import Rut + + +logger = logging.getLogger(__name__) + + +def parse_rcv_venta_csv_file( + emisor_rut: Rut, + emisor_razon_social: str, + input_file_path: str, + n_rows_offset: int = 0, + max_n_rows: int = None, +) -> Iterable[Tuple[Optional[DteDataL2], int, Dict[str, object], Dict[str, object]]]: + """ + Parse DTE data objects from a RCV "Venta" file (CSV). + + """ + schema_context = dict( + emisor_rut=emisor_rut, + emisor_razon_social=emisor_razon_social, + ) + input_csv_row_schema = RcvVentaCsvRowSchema(context=schema_context) + + expected_input_field_names = ( + 'Nro', + 'Tipo Doc', # 'tipo_dte' + 'Tipo Venta', + 'Rut cliente', # 'receptor_rut' + 'Razon Social', # 'receptor_razon_social' + 'Folio', # 'folio' + 'Fecha Docto', # 'fecha_emision_date' + 'Fecha Recepcion', # 'fecha_recepcion_dt' + # 'Fecha Acuse Recibo', # 'fecha_acuse_recibo_dt' + 'Fecha Acuse Recibo', + # 'Fecha Reclamo', # 'fecha_reclamo_dt' + 'Fecha Reclamo', + 'Monto Exento', + 'Monto Neto', + 'Monto IVA', + 'Monto total', # 'monto_total' + 'IVA Retenido Total', + 'IVA Retenido Parcial', + 'IVA no retenido', + 'IVA propio', + 'IVA Terceros', + 'RUT Emisor Liquid. Factura', + 'Neto Comision Liquid. Factura', + 'Exento Comision Liquid. Factura', + 'IVA Comision Liquid. Factura', + 'IVA fuera de plazo', + 'Tipo Docto. Referencia', + 'Folio Docto. Referencia', + 'Num. Ident. Receptor Extranjero', + 'Nacionalidad Receptor Extranjero', + 'Credito empresa constructora', + 'Impto. Zona Franca (Ley 18211)', + 'Garantia Dep. Envases', + 'Indicador Venta sin Costo', + 'Indicador Servicio Periodico', + 'Monto No facturable', + 'Total Monto Periodo', + 'Venta Pasajes Transporte Nacional', + 'Venta Pasajes Transporte Internacional', + 'Numero Interno', + 'Codigo Sucursal', + 'NCE o NDE sobre Fact. de Compra', + 'Codigo Otro Imp.', + 'Valor Otro Imp.', + 'Tasa Otro Imp.', + ) + + fields_to_remove_names = ( + 'Nro', + 'Tipo Venta', + 'Fecha Acuse Recibo', + 'Fecha Reclamo', + 'Monto Exento', + 'Monto Neto', + 'Monto IVA', + 'IVA Retenido Total', + 'IVA Retenido Parcial', + 'IVA no retenido', + 'IVA propio', + 'IVA Terceros', + 'RUT Emisor Liquid. Factura', + 'Neto Comision Liquid. Factura', + 'Exento Comision Liquid. Factura', + 'IVA Comision Liquid. Factura', + 'IVA fuera de plazo', + 'Tipo Docto. Referencia', + 'Folio Docto. Referencia', + 'Num. Ident. Receptor Extranjero', + 'Nacionalidad Receptor Extranjero', + 'Credito empresa constructora', + 'Impto. Zona Franca (Ley 18211)', + 'Garantia Dep. Envases', + 'Indicador Venta sin Costo', + 'Indicador Servicio Periodico', + 'Monto No facturable', + 'Total Monto Periodo', + 'Venta Pasajes Transporte Nacional', + 'Venta Pasajes Transporte Internacional', + 'Numero Interno', + 'Codigo Sucursal', + 'NCE o NDE sobre Fact. de Compra', + 'Codigo Otro Imp.', + 'Valor Otro Imp.', + 'Tasa Otro Imp.', + ) + + yield from _parse_rcv_csv_file( + input_csv_row_schema, + expected_input_field_names, + fields_to_remove_names, + input_file_path, + n_rows_offset, + max_n_rows, + ) + + +############################################################################### +# schemas +############################################################################### + +class _RcvCsvRowSchemaBase(marshmallow.Schema): + + @marshmallow.validates_schema(pass_original=True) + def validate_schema(self, data: dict, original_data: dict) -> None: + mm_utils.validate_no_unexpected_input_fields(self, data, original_data) + + # @marshmallow.validates('field_x') + # def validate_field_x(self, value): + # pass + + def to_dte_data_l2(self, data: dict) -> DteDataL2: + # note: the data of some serializer fields may not be included in the returned struct. + + try: + emisor_rut: Rut = data['emisor_rut'] # type: ignore + receptor_rut: Rut = data['receptor_rut'] # type: ignore + tipo_dte = data['tipo_dte'] # type: ignore + folio: int = data['folio'] # type: ignore + fecha_emision_date: date = data['fecha_emision_date'] # type: ignore + monto_total: int = data['monto_total'] # type: ignore + emisor_razon_social: str = data['emisor_razon_social'] # type: ignore + receptor_razon_social: str = data['receptor_razon_social'] # type: ignore + except KeyError as exc: + raise ValueError("Programming error: a referenced field is missing.") from exc + + try: + dte_data = DteDataL2( + emisor_rut=emisor_rut, + tipo_dte=tipo_dte, + folio=folio, + fecha_emision_date=fecha_emision_date, + receptor_rut=receptor_rut, + monto_total=monto_total, + emisor_razon_social=emisor_razon_social, + receptor_razon_social=receptor_razon_social, + # fecha_vencimiento_date='', + # firma_documento_dt='', + # signature_value='', + # signature_x509_cert_der='', + # emisor_giro='', + # emisor_email='', + # receptor_email='', + ) + except (TypeError, ValueError): + raise + + return dte_data + + +class RcvVentaCsvRowSchema(_RcvCsvRowSchemaBase): + + FIELD_FECHA_RECEPCION_DT_TZ = DteDataL2.DATETIME_FIELDS_TZ + FIELD_FECHA_ACUSE_RECIBO_DT_TZ = DteDataL2.DATETIME_FIELDS_TZ + FIELD_FECHA_RECLAMO_DT_TZ = DteDataL2.DATETIME_FIELDS_TZ + + class Meta: + strict = True + + ########################################################################### + # basic fields + ########################################################################### + + tipo_dte = mm_fields.TipoDteField( + required=True, + load_from='Tipo Doc', + ) + folio = marshmallow.fields.Integer( + required=True, + load_from='Folio', + ) + fecha_emision_date = mm_utils.CustomMarshmallowDateField( + format='%d/%m/%Y', # e.g. '22/10/2018' + required=True, + load_from='Fecha Docto', + ) + receptor_rut = mm_fields.RutField( + required=True, + load_from='Rut cliente', + ) + monto_total = marshmallow.fields.Integer( + required=True, + load_from='Monto total', + ) + receptor_razon_social = marshmallow.fields.String( + required=True, + load_from='Razon Social', + ) + + ########################################################################### + # fields whose value is set using data passed in the schema context + ########################################################################### + + emisor_rut = mm_fields.RutField( + required=True, + ) + emisor_razon_social = marshmallow.fields.String( + required=True, + ) + + ########################################################################### + # extra fields: not included in the returned struct + ########################################################################### + + fecha_recepcion_dt = marshmallow.fields.DateTime( + format='%d/%m/%Y %H:%M:%S', # e.g. '23/10/2018 01:54:13' + required=True, + load_from='Fecha Recepcion', + ) + fecha_acuse_recibo_dt = marshmallow.fields.DateTime( + format='%d/%m/%Y %H:%M:%S', # e.g. '23/10/2018 01:54:13' + required=False, + allow_none=True, + load_from='Fecha Acuse Recibo', + ) + fecha_reclamo_dt = marshmallow.fields.DateTime( + format='%d/%m/%Y %H:%M:%S', # e.g. '23/10/2018 01:54:13' + required=False, + allow_none=True, + load_from='Fecha Reclamo', + ) + + @marshmallow.pre_load + def preprocess(self, in_data: dict) -> dict: + # note: required fields checks are run later on automatically thus we may not assume that + # values of required fields (`required=True`) exist. + + # Set field value only if it was not in the input data. + in_data.setdefault('emisor_rut', self.context['emisor_rut']) + in_data.setdefault('emisor_razon_social', self.context['emisor_razon_social']) + + # Fix missing/default values. + if 'Fecha Acuse Recibo' in in_data: + if in_data['Fecha Acuse Recibo'] == '': + in_data['Fecha Acuse Recibo'] = None + if 'Fecha Reclamo' in in_data: + if in_data['Fecha Reclamo'] == '': + in_data['Fecha Reclamo'] = None + + return in_data + + @marshmallow.post_load + def postprocess(self, data: dict) -> dict: + # >>> data['fecha_recepcion_dt'].isoformat() + # '2018-10-23T01:54:13' + data['fecha_recepcion_dt'] = tz_utils.convert_naive_dt_to_tz_aware( + dt=data['fecha_recepcion_dt'], tz=self.FIELD_FECHA_RECEPCION_DT_TZ) + # >>> data['fecha_recepcion_dt'].isoformat() + # '2018-10-23T01:54:13-03:00' + # >>> data['fecha_recepcion_dt'].astimezone(pytz.UTC).isoformat() + # '2018-10-23T04:54:13+00:00' + + # note: to express this value in another timezone (but the value does not change), do + # `dt_obj.astimezone(pytz.timezone('some timezone'))` + + if 'fecha_acuse_recibo_dt' in data and data['fecha_acuse_recibo_dt']: + data['fecha_acuse_recibo_dt'] = tz_utils.convert_naive_dt_to_tz_aware( + dt=data['fecha_acuse_recibo_dt'], tz=self.FIELD_FECHA_ACUSE_RECIBO_DT_TZ) + if 'fecha_reclamo_dt' in data and data['fecha_reclamo_dt']: + data['fecha_reclamo_dt'] = tz_utils.convert_naive_dt_to_tz_aware( + dt=data['fecha_reclamo_dt'], tz=self.FIELD_FECHA_RECLAMO_DT_TZ) + + return data + + +############################################################################### +# helpers +############################################################################### + +class _RcvCsvDialect(csv.Dialect): + + """ + CSV dialect of RCV CSV files. + + The properties of this dialect were determined with the help of + :class:`csv.Sniffer`. + + >>> import gzip + >>> filename = 'SII-download-RCV-file-http-body-response.csv.gz' + >>> with gzip.open(filename, 'rt', encoding='utf-8') as f: + ... dialect = csv.Sniffer().sniff(f.read(50 * 1024)) + + """ + + delimiter = ';' + quotechar = '"' + escapechar = None + doublequote = False + skipinitialspace = False + lineterminator = '\r\n' + quoting = csv.QUOTE_MINIMAL + + +def _parse_rcv_csv_file( + input_csv_row_schema: _RcvCsvRowSchemaBase, + expected_input_field_names: Sequence[str], + fields_to_remove_names: Sequence[str], + input_file_path: str, + n_rows_offset: int, + max_n_rows: int = None, +) -> Iterable[Tuple[Optional[DteDataL2], int, Dict[str, object], Dict[str, object]]]: + """ + Parse DTE data objects from a RCV file (CSV). + + Common implementation for the different kinds of RCV files (CSV). + + """ + for field_to_remove_name in fields_to_remove_names: + if field_to_remove_name not in expected_input_field_names: + raise Exception( + "Programming error: field to remove is not one of the expected ones.", + field_to_remove_name) + + _CSV_ROW_DICT_EXTRA_FIELDS_KEY = '_extra_csv_fields_data' + + fields_to_remove_names += (_CSV_ROW_DICT_EXTRA_FIELDS_KEY, ) # type: ignore + + input_data_enc = 'utf-8' + # note: + # > If csvfile is a file object, it should be opened with newline='' + # https://docs.python.org/3/library/csv.html#csv.reader + with open(input_file_path, mode='rt', encoding=input_data_enc, newline='') as input_f: + # Create a CSV reader, with auto-detection of header names (first row). + csv_reader = csv_utils.create_csv_dict_reader( + input_f, + csv_dialect=_RcvCsvDialect, + row_dict_extra_fields_key=_CSV_ROW_DICT_EXTRA_FIELDS_KEY, + expected_fields_strict=True, + expected_field_names=expected_input_field_names, + ) + + g = rows_processing.csv_rows_mm_deserialization_iterator( + csv_reader, + row_schema=input_csv_row_schema, + n_rows_offset=n_rows_offset, + max_n_rows=max_n_rows, + fields_to_remove_names=fields_to_remove_names, + ) + + for row_ix, row_data, deserialized_row_data, validation_errors in g: + logger.debug("Processing row %s. Content: %s", row_ix, repr(row_data)) + + dte_data = None + row_errors: Dict[str, object] = {} + conversion_error = None + + if not validation_errors: + try: + dte_data = input_csv_row_schema.to_dte_data_l2(deserialized_row_data) + except Exception as exc: + conversion_error = str(exc) + logger.exception( + "Deserialized data to data model instance conversion failed " + "(probably a programming error).") + + # Instead of empty dicts, lists, str, etc, we want to have None. + if validation_errors: + row_errors['validation'] = validation_errors + if conversion_error: + row_errors['other'] = conversion_error + + yield dte_data, row_ix, row_data, row_errors diff --git a/tests/test_rcv_parse_csv.py b/tests/test_rcv_parse_csv.py new file mode 100644 index 00000000..6dc7741c --- /dev/null +++ b/tests/test_rcv_parse_csv.py @@ -0,0 +1,22 @@ +import unittest + +from cl_sii.rcv.parse_csv import ( # noqa: F401 + RcvVentaCsvRowSchema, parse_rcv_venta_csv_file, _parse_rcv_csv_file, +) + + +class RcvVentaCsvRowSchemaTest(unittest.TestCase): + + # TODO: implement for 'RcvVentaCsvRowSchema'. + pass + + +class FunctionsTest(unittest.TestCase): + + def test_parse_rcv_venta_csv_file(self) -> None: + # TODO: implement for 'parse_rcv_venta_csv_file'. + pass + + def test__parse_rcv_csv_file(self) -> None: + # TODO: implement for '_parse_rcv_csv_file'. + pass From 009e310745af01c7d2cd02c36532374ea663692c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Fri, 24 May 2019 20:21:19 -0400 Subject: [PATCH 7/8] HISTORY: update for new version --- HISTORY.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index ec01202d..c4b30a84 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,15 @@ History ------- +0.6.3 (2019-05-24) ++++++++++++++++++++++++ + +* (PR #52, 2019-05-24) rcv: add module ``parse_csv`` +* (PR #51, 2019-05-24) libs: add module ``rows_processing`` +* (PR #50, 2019-05-24) libs: add module ``csv_utils`` +* (PR #49, 2019-05-24) libs.mm_utils: add ``validate_no_unexpected_input_fields`` +* (PR #48, 2019-05-24) dte.data_models: add ``DteDataL2.as_dte_data_l1`` + 0.6.2 (2019-05-15) +++++++++++++++++++++++ From 4bea00322ede92e660b2372ca6560a22d36bba77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?= Date: Fri, 24 May 2019 20:21:51 -0400 Subject: [PATCH 8/8] =?UTF-8?q?Bump=20version:=200.6.2=20=E2=86=92=200.6.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- cl_sii/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 1e57eee7..cae36e13 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.2 +current_version = 0.6.3 commit = True tag = True diff --git a/cl_sii/__init__.py b/cl_sii/__init__.py index d90243c9..6a73fc58 100644 --- a/cl_sii/__init__.py +++ b/cl_sii/__init__.py @@ -5,4 +5,4 @@ """ -__version__ = '0.6.2' +__version__ = '0.6.3'