From 0984b00e8a0a957569c364716af74c2ee271e614 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Sat, 4 May 2019 15:37:43 -0400
Subject: [PATCH 1/8] scripts: improve 'example.py'

Based on (almost identical to)
https://github.com/fynpal/lib-cl-sii-api-python/blob/74e19d9/scripts/example.py
---
 scripts/example.py | 56 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/scripts/example.py b/scripts/example.py
index 48644f3f..2372c1c0 100755
--- a/scripts/example.py
+++ b/scripts/example.py
@@ -3,14 +3,20 @@
 Example script
 ==============
 
+Does X and Y, and then Z.
 
-Example::
+Example
+-------
+
+For example, to do X, run::
 
     ./scripts/example.py arg1 arg2 arg3
 
 """
+import logging
 import os
 import sys
+from datetime import datetime
 from typing import Sequence
 
 try:
@@ -22,9 +28,53 @@
     import cl_sii  # noqa: F401
 
 
+logger = logging.getLogger(__name__)
+root_logger = logging.getLogger()
+
+
+###############################################################################
+# logging config
+###############################################################################
+
+_loggers = [logger, logging.getLogger('cl_sii')]
+for _logger in _loggers:
+    _logger.addHandler(logging.StreamHandler())
+    _logger.setLevel(logging.INFO)
+
+root_logger.setLevel(logging.WARNING)
+
+
+###############################################################################
+# script
+###############################################################################
+
 def main(args: Sequence[str]) -> None:
-    print("Example script.")
-    print(f"Args: {args!s}")
+    start_ts = datetime.now()
+
+    logger.debug("Example script. Args: %s", args)
+
+    try:
+        print("Action: do something")
+    except FileNotFoundError:
+        logger.exception(
+            "Process aborted: a file could not be opened.", exc_info=True)
+    except KeyboardInterrupt:
+        logger.error("Process interrupted by user.")
+    except Exception:
+        logger.exception("Process aborted.")
+    finally:
+        try:
+            print("Action: clean up resources and connections")
+            logger.info("Cleaned up resources and connections.")
+        except Exception:
+            logger.exception("Failed to clean up resources and connections.")
+
+        finish_ts = datetime.now()
+        duration = finish_ts - start_ts
+
+        logger.info(f"start: {start_ts.isoformat()}")
+        logger.info(f"finish: {finish_ts.isoformat()}")
+        logger.info(f"duration: {duration!s}")
 
 
 if __name__ == '__main__':

From c0fea79ccfa6503f71aeabf58a8c3715e0b2a1cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Thu, 23 May 2019 22:18:22 -0400
Subject: [PATCH 2/8] dte.data_models: add `DteDataL2.as_dte_data_l1`

---
 cl_sii/dte/data_models.py     |  9 +++++++++
 tests/test_dte_data_models.py | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/cl_sii/dte/data_models.py b/cl_sii/dte/data_models.py
index 7184b31b..beff1846 100644
--- a/cl_sii/dte/data_models.py
+++ b/cl_sii/dte/data_models.py
@@ -439,3 +439,12 @@ def __post_init__(self) -> None:
                 raise TypeError("Inappropriate type of 'receptor_email'.")
             validate_clean_str(self.receptor_email)
             validate_non_empty_str(self.receptor_email)
+
+    def as_dte_data_l1(self) -> DteDataL1:
+        return DteDataL1(
+            emisor_rut=self.emisor_rut,
+            tipo_dte=self.tipo_dte,
+            folio=self.folio,
+            fecha_emision_date=self.fecha_emision_date,
+            receptor_rut=self.receptor_rut,
+            monto_total=self.monto_total)
diff --git a/tests/test_dte_data_models.py b/tests/test_dte_data_models.py
index e0ae146c..01d705fb 100644
--- a/tests/test_dte_data_models.py
+++ b/tests/test_dte_data_models.py
@@ -241,6 +241,19 @@ def test_as_dict(self) -> None:
                 receptor_email=None,
             ))
 
+    def test_as_dte_data_l1(self) -> None:
+        self.assertEqual(
+            self.dte_l2_1.as_dte_data_l1(),
+            DteDataL1(
+                emisor_rut=Rut('76354771-K'),
+                tipo_dte=TipoDteEnum.FACTURA_ELECTRONICA,
+                folio=170,
+                fecha_emision_date=date(2019, 4, 1),
+                receptor_rut=Rut('96790240-3'),
+                monto_total=2996301,
+            )
+        )
+
 
 class FunctionsTest(unittest.TestCase):
 

From 3c68ba2526a43a8f4eb447238399178fcec94ec6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 3 May 2019 22:38:31 -0400
Subject: [PATCH 3/8] libs.mm_utils: add `validate_no_unexpected_input_fields`

---
 cl_sii/libs/mm_utils.py     | 42 +++++++++++++++++++++++++++++++++++++
 tests/test_libs_mm_utils.py | 13 ++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/cl_sii/libs/mm_utils.py b/cl_sii/libs/mm_utils.py
index 52d33411..8f319bea 100644
--- a/cl_sii/libs/mm_utils.py
+++ b/cl_sii/libs/mm_utils.py
@@ -6,6 +6,48 @@
 import marshmallow.utils
 
 
+###############################################################################
+# validators
+###############################################################################
+
+def validate_no_unexpected_input_fields(
+    schema: marshmallow.Schema,
+    data: dict,
+    original_data: dict,
+) -> None:
+    """
+    Fail validation if there was an unexpected input field.
+
+    Usage::
+
+        class MySchema(marshmallow.Schema):
+
+            class Meta:
+                strict = True
+
+            folio = marshmallow.fields.Integer()
+
+            @marshmallow.validates_schema(pass_original=True)
+            def validate_schema(self, data: dict, original_data: dict) -> None:
+                validate_no_unexpected_input_fields(self, data, original_data)
+
+    """
+    # Original inspiration from
+    #   https://marshmallow.readthedocs.io/en/2.x-line/extending.html#validating-original-input-data
+    fields_name_or_load_from = {
+        field.name if field.load_from is None else field.load_from
+        for field_key, field in schema.fields.items()
+    }
+    unexpected_input_fields = set(original_data) - fields_name_or_load_from
+    if unexpected_input_fields:
+        raise marshmallow.ValidationError(
+            "Unexpected input field.", field_names=list(unexpected_input_fields))
+
+
+###############################################################################
+# fields
+###############################################################################
+
 class CustomMarshmallowDateField(marshmallow.fields.Field):
     """
     A formatted date string.
diff --git a/tests/test_libs_mm_utils.py b/tests/test_libs_mm_utils.py
index d1a0c8b3..6957a42c 100644
--- a/tests/test_libs_mm_utils.py
+++ b/tests/test_libs_mm_utils.py
@@ -1,10 +1,19 @@
 import unittest
 
-from cl_sii.libs.mm_utils import CustomMarshmallowDateField  # noqa: F401
+from cl_sii.libs.mm_utils import (  # noqa: F401
+    CustomMarshmallowDateField, validate_no_unexpected_input_fields,
+)
 
 
 class CustomMarshmallowDateFieldTest(unittest.TestCase):
 
     def test_x(self) -> None:
-        # TODO: implement!
+        # TODO: implement for 'CustomMarshmallowDateField'.
+        pass
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_validate_no_unexpected_input_fields(self):
+        # TODO: implement for 'validate_no_unexpected_input_fields'.
         pass

From 5c8ee3939023561debb69694cb5a354d6911220f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 3 May 2019 23:09:06 -0400
Subject: [PATCH 4/8] libs: add module `csv_utils`

---
 cl_sii/libs/csv_utils.py     | 49 ++++++++++++++++++++++++++++++++++++
 tests/test_libs_csv_utils.py | 10 ++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 cl_sii/libs/csv_utils.py
 create mode 100644 tests/test_libs_csv_utils.py

diff --git a/cl_sii/libs/csv_utils.py b/cl_sii/libs/csv_utils.py
new file mode 100644
index 00000000..c3a910d9
--- /dev/null
+++ b/cl_sii/libs/csv_utils.py
@@ -0,0 +1,49 @@
+import csv
+from typing import IO, Sequence, Type, Union
+
+
+def create_csv_dict_reader(
+    text_stream: IO[str],
+    csv_dialect: Type[csv.Dialect],
+    row_dict_extra_fields_key: Union[str, None] = None,
+    expected_fields_strict: bool = True,
+    expected_field_names: Sequence[str] = None,
+) -> csv.DictReader:
+    """
+    Create a CSV dict reader with custom options.
+
+    :param text_stream:
+    :param row_dict_extra_fields_key:
+        CSV row dict key under which the extra data in the row will be saved
+    :param csv_dialect:
+    :param expected_fields_strict:
+    :param expected_field_names:
+        (required if ``expected_field_names`` is True)
+    :return: a CSV DictReader
+
+    """
+    # note: mypy wrongly complains: it does not accept 'fieldnames' to be None but that value
+    #   is completely acceptable, and it even is the default!
+    #   > error: Argument "fieldnames" to "DictReader" has incompatible type "None"; expected
+    #   > "Sequence[str]"
+    # note: mypy wrongly complains:
+    #   > Argument "dialect" to "DictReader" has incompatible type "Type[Dialect]";
+    #   > expected "Union[str, Dialect]"
+    csv_reader = csv.DictReader(  # type: ignore
+        text_stream,
+        fieldnames=None,  # the values of the first row will be used as the fieldnames
+        restkey=row_dict_extra_fields_key,
+        dialect=csv_dialect,
+    )
+
+    if expected_fields_strict:
+        if expected_field_names:
+            if tuple(csv_reader.fieldnames) != expected_field_names:
+                raise ValueError(
+                    "CSV file field names do not match those expected, or their order.",
+                    csv_reader.fieldnames)
+        else:
+            raise ValueError(
+                "Param 'expected_field_names' is required if 'expected_fields_strict' is True.")
+
+    return csv_reader
diff --git a/tests/test_libs_csv_utils.py b/tests/test_libs_csv_utils.py
new file mode 100644
index 00000000..c1669199
--- /dev/null
+++ b/tests/test_libs_csv_utils.py
@@ -0,0 +1,10 @@
+import unittest
+
+from cl_sii.libs.csv_utils import create_csv_dict_reader  # noqa: F401
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_create_csv_dict_reader(self):
+        # TODO: implement for 'create_csv_dict_reader'.
+        pass

From 260636a27ede2836d9e00fe86763a44c49044494 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 3 May 2019 23:09:31 -0400
Subject: [PATCH 5/8] libs: add module `rows_processing`

---
 cl_sii/libs/rows_processing.py     | 153 +++++++++++++++++++++++++++++
 tests/test_libs_rows_processing.py |  16 +++
 2 files changed, 169 insertions(+)
 create mode 100644 cl_sii/libs/rows_processing.py
 create mode 100644 tests/test_libs_rows_processing.py

diff --git a/cl_sii/libs/rows_processing.py b/cl_sii/libs/rows_processing.py
new file mode 100644
index 00000000..b906e549
--- /dev/null
+++ b/cl_sii/libs/rows_processing.py
@@ -0,0 +1,153 @@
+import csv
+import logging
+
+from typing import Dict, Iterable, Sequence, Tuple
+
+import marshmallow
+
+
+logger = logging.getLogger(__name__)
+
+
+class MaxRowsExceeded(RuntimeError):
+
+    """
+    The maximum number of rows has been exceeded.
+    """
+
+
+###############################################################################
+# iterators
+###############################################################################
+
+def csv_rows_mm_deserialization_iterator(
+    csv_reader: csv.DictReader,
+    row_schema: marshmallow.Schema,
+    n_rows_offset: int = 0,
+    max_n_rows: int = None,
+    fields_to_remove_names: Sequence[str] = None,
+) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]:
+    """
+    Marshmallow deserialization iterator over CSV rows.
+
+    Iterate over ``csv_reader``, deserialize each row using ``row_schema``
+    and yield the data before and after deserialization, plus any
+    validation/deserialization errors.
+
+    .. note:: The CSV header row is omitted, obviously.
+
+    :param csv_reader:
+    :param row_schema:
+        Marshmallow schema for deserializing each CSV row
+    :param n_rows_offset:
+        (optional) number of rows to skip (and not deserialize)
+    :param max_n_rows:
+        (optional) max number of rows to deserialize (raise exception
+        if exceeded); ``None`` means no limit
+    :param fields_to_remove_names:
+        (optional) the name of each field that must be removed (if it exists)
+        from the row
+    :returns:
+        yields a tuple of (``row_ix`` (1-based), ``row_data``,
+        ``deserialized_row_data``, ``validation_errors``)
+    :raises MaxRowsExceeded:
+        number of data rows processed exceeded ``max_n_rows``
+    :raises RuntimeError:
+        on CSV error when iterating over ``csv_reader``
+
+    """
+    # note: mypy complaint is wrong because a 'csv.DictReader' object can be iterated over
+    #   and yields instances of 'Dict[str, object]'.
+    #   > Incompatible types in assignment (expression has type "DictReader", variable has type
+    #   > "Iterable[Dict[str, object]]")
+    rows_iterator: Iterable[Dict[str, object]] = csv_reader  # type: ignore
+    iterator = rows_mm_deserialization_iterator(
+        rows_iterator, row_schema, n_rows_offset, max_n_rows, fields_to_remove_names)
+
+    try:
+        # note: we chose not to use 'yield from' to be explicit about what we are yielding.
+        for row_ix, row_data, deserialized_row_data, validation_errors in iterator:
+            yield row_ix, row_data, deserialized_row_data, validation_errors
+    except csv.Error as exc:
+        exc_msg = f"CSV error for line {csv_reader.line_num} of CSV file."
+        raise RuntimeError(exc_msg) from exc
+
+
+def rows_mm_deserialization_iterator(
+    rows_iterator: Iterable[Dict[str, object]],
+    row_schema: marshmallow.Schema,
+    n_rows_offset: int = 0,
+    max_n_rows: int = None,
+    fields_to_remove_names: Sequence[str] = None,
+) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]:
+    """
+    Marshmallow deserialization iterator.
+
+    Iterate over ``rows_iterator``, deserialize each row using ``row_schema``
+    and yield the data before and after deserialization, plus any
+    validation/deserialization errors.
+
+    :param rows_iterator:
+    :param row_schema:
+        Marshmallow schema for deserializing each row
+    :param n_rows_offset:
+        (optional) number of rows to skip (and not deserialize)
+    :param max_n_rows:
+        (optional) max number of rows to deserialize (raise exception
+        if exceeded); ``None`` means no limit
+    :param fields_to_remove_names:
+        (optional) the name of each field that must be removed (if it exists)
+        from the row
+    :returns:
+        yields a tuple of (``row_ix`` (1-based), ``row_data``,
+        ``deserialized_row_data``, ``validation_errors``)
+    :raises MaxRowsExceeded:
+        number of data rows processed exceeded ``max_n_rows``
+
+    """
+    if not n_rows_offset >= 0:
+        raise ValueError("Param 'n_rows_offset' must be an integer >= 0.")
+
+    fields_to_remove_names = fields_to_remove_names or ()
+
+    for row_ix, row_data in enumerate(rows_iterator, start=1):
+        if max_n_rows is not None and row_ix > max_n_rows + n_rows_offset:
+            raise MaxRowsExceeded(f"Exceeded 'max_n_rows' limit: {max_n_rows}.")
+
+        if row_ix <= n_rows_offset:
+            continue
+
+        for _field_name in fields_to_remove_names:
+            row_data.pop(_field_name, None)
+
+        try:
+            mm_result: marshmallow.UnmarshalResult = row_schema.load(row_data)
+            deserialized_row_data: dict = mm_result.data
+            raised_validation_errors: dict = {}
+            returned_validation_errors: dict = mm_result.errors
+        except marshmallow.ValidationError as exc:
+            deserialized_row_data = {}
+            raised_validation_errors = dict(exc.normalized_messages())
+            returned_validation_errors = {}
+
+        validation_errors = raised_validation_errors
+        if returned_validation_errors:
+            if row_schema.strict:
+                # 'marshmallow.schema.BaseSchema':
+                # > :param bool strict: If `True`, raise errors if invalid data are passed in
+                # > instead of failing silently and storing the errors.
+                logger.error(
+                    "Marshmallow schema is 'strict' but validation errors were returned by "
+                    "method 'load' ('UnmarshalResult.errors') instead of being raised. "
+                    "Errors: %s",
+                    repr(returned_validation_errors))
+            if raised_validation_errors:
+                logger.fatal(
+                    "Programming error: either returned or raised validation errors "
+                    "(depending on 'strict') but never both. "
+                    "Returned errors: %s. Raised errors: %s",
+                    repr(returned_validation_errors), repr(raised_validation_errors))
+
+            validation_errors.update(returned_validation_errors)
+
+        yield row_ix, row_data, deserialized_row_data, validation_errors
diff --git a/tests/test_libs_rows_processing.py b/tests/test_libs_rows_processing.py
new file mode 100644
index 00000000..7cd9d2a0
--- /dev/null
+++ b/tests/test_libs_rows_processing.py
@@ -0,0 +1,16 @@
+import unittest
+
+from cl_sii.libs.rows_processing import (  # noqa: F401
+    csv_rows_mm_deserialization_iterator, rows_mm_deserialization_iterator,
+)
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_csv_rows_mm_deserialization_iterator(self):
+        # TODO: implement for 'csv_rows_mm_deserialization_iterator'.
+        pass
+
+    def test_rows_mm_deserialization_iterator(self):
+        # TODO: implement for 'rows_mm_deserialization_iterator'.
+        pass

From c4fbfc132c47cd7b5569541a75271116ab9779e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 24 May 2019 20:15:55 -0400
Subject: [PATCH 6/8] rcv: add module `parse_csv`

Includes function `parse_rcv_venta_csv_file`.

Tests have not been implemented.
---
 cl_sii/rcv/parse_csv.py     | 406 ++++++++++++++++++++++++++++++++++++
 tests/test_rcv_parse_csv.py |  22 ++
 2 files changed, 428 insertions(+)
 create mode 100644 cl_sii/rcv/parse_csv.py
 create mode 100644 tests/test_rcv_parse_csv.py

diff --git a/cl_sii/rcv/parse_csv.py b/cl_sii/rcv/parse_csv.py
new file mode 100644
index 00000000..392c4e7a
--- /dev/null
+++ b/cl_sii/rcv/parse_csv.py
@@ -0,0 +1,406 @@
+"""
+Parse RCV files (CSV)
+=====================
+
+
+"""
+import csv
+from datetime import date
+import logging
+from typing import Dict, Iterable, Optional, Sequence, Tuple
+
+import marshmallow
+import marshmallow.fields
+import marshmallow.validate
+
+from cl_sii.dte.data_models import DteDataL2
+from cl_sii.extras import mm_fields
+from cl_sii.libs import csv_utils
+from cl_sii.libs import mm_utils
+from cl_sii.libs import rows_processing
+from cl_sii.libs import tz_utils
+from cl_sii.rut import Rut
+
+
+logger = logging.getLogger(__name__)
+
+
+def parse_rcv_venta_csv_file(
+    emisor_rut: Rut,
+    emisor_razon_social: str,
+    input_file_path: str,
+    n_rows_offset: int = 0,
+    max_n_rows: int = None,
+) -> Iterable[Tuple[Optional[DteDataL2], int, Dict[str, object], Dict[str, object]]]:
+    """
+    Parse DTE data objects from a RCV "Venta" file (CSV).
+
+    """
+    schema_context = dict(
+        emisor_rut=emisor_rut,
+        emisor_razon_social=emisor_razon_social,
+    )
+    input_csv_row_schema = RcvVentaCsvRowSchema(context=schema_context)
+
+    expected_input_field_names = (
+        'Nro',
+        'Tipo Doc',  # 'tipo_dte'
+        'Tipo Venta',
+        'Rut cliente',  # 'receptor_rut'
+        'Razon Social',  # 'receptor_razon_social'
+        'Folio',  # 'folio'
+        'Fecha Docto',  # 'fecha_emision_date'
+        'Fecha Recepcion',  # 'fecha_recepcion_dt'
+        # 'Fecha Acuse Recibo',  # 'fecha_acuse_recibo_dt'
+        'Fecha Acuse Recibo',
+        # 'Fecha Reclamo',  # 'fecha_reclamo_dt'
+        'Fecha Reclamo',
+        'Monto Exento',
+        'Monto Neto',
+        'Monto IVA',
+        'Monto total',  # 'monto_total'
+        'IVA Retenido Total',
+        'IVA Retenido Parcial',
+        'IVA no retenido',
+        'IVA propio',
+        'IVA Terceros',
+        'RUT Emisor Liquid. Factura',
+        'Neto Comision Liquid. Factura',
+        'Exento Comision Liquid. Factura',
+        'IVA Comision Liquid. Factura',
+        'IVA fuera de plazo',
+        'Tipo Docto. Referencia',
+        'Folio Docto. Referencia',
+        'Num. Ident. Receptor Extranjero',
+        'Nacionalidad Receptor Extranjero',
+        'Credito empresa constructora',
+        'Impto. Zona Franca (Ley 18211)',
+        'Garantia Dep. Envases',
+        'Indicador Venta sin Costo',
+        'Indicador Servicio Periodico',
+        'Monto No facturable',
+        'Total Monto Periodo',
+        'Venta Pasajes Transporte Nacional',
+        'Venta Pasajes Transporte Internacional',
+        'Numero Interno',
+        'Codigo Sucursal',
+        'NCE o NDE sobre Fact. de Compra',
+        'Codigo Otro Imp.',
+        'Valor Otro Imp.',
+        'Tasa Otro Imp.',
+    )
+
+    fields_to_remove_names = (
+        'Nro',
+        'Tipo Venta',
+        'Fecha Acuse Recibo',
+        'Fecha Reclamo',
+        'Monto Exento',
+        'Monto Neto',
+        'Monto IVA',
+        'IVA Retenido Total',
+        'IVA Retenido Parcial',
+        'IVA no retenido',
+        'IVA propio',
+        'IVA Terceros',
+        'RUT Emisor Liquid. Factura',
+        'Neto Comision Liquid. Factura',
+        'Exento Comision Liquid. Factura',
+        'IVA Comision Liquid. Factura',
+        'IVA fuera de plazo',
+        'Tipo Docto. Referencia',
+        'Folio Docto. Referencia',
+        'Num. Ident. Receptor Extranjero',
+        'Nacionalidad Receptor Extranjero',
+        'Credito empresa constructora',
+        'Impto. Zona Franca (Ley 18211)',
+        'Garantia Dep. Envases',
+        'Indicador Venta sin Costo',
+        'Indicador Servicio Periodico',
+        'Monto No facturable',
+        'Total Monto Periodo',
+        'Venta Pasajes Transporte Nacional',
+        'Venta Pasajes Transporte Internacional',
+        'Numero Interno',
+        'Codigo Sucursal',
+        'NCE o NDE sobre Fact. de Compra',
+        'Codigo Otro Imp.',
+        'Valor Otro Imp.',
+        'Tasa Otro Imp.',
+    )
+
+    yield from _parse_rcv_csv_file(
+        input_csv_row_schema,
+        expected_input_field_names,
+        fields_to_remove_names,
+        input_file_path,
+        n_rows_offset,
+        max_n_rows,
+    )
+
+
+###############################################################################
+# schemas
+###############################################################################
+
+class _RcvCsvRowSchemaBase(marshmallow.Schema):
+
+    @marshmallow.validates_schema(pass_original=True)
+    def validate_schema(self, data: dict, original_data: dict) -> None:
+        mm_utils.validate_no_unexpected_input_fields(self, data, original_data)
+
+    # @marshmallow.validates('field_x')
+    # def validate_field_x(self, value):
+    #     pass
+
+    def to_dte_data_l2(self, data: dict) -> DteDataL2:
+        # note: the data of some serializer fields may not be included in the returned struct.
+
+        try:
+            emisor_rut: Rut = data['emisor_rut']  # type: ignore
+            receptor_rut: Rut = data['receptor_rut']  # type: ignore
+            tipo_dte = data['tipo_dte']  # type: ignore
+            folio: int = data['folio']  # type: ignore
+            fecha_emision_date: date = data['fecha_emision_date']  # type: ignore
+            monto_total: int = data['monto_total']  # type: ignore
+            emisor_razon_social: str = data['emisor_razon_social']  # type: ignore
+            receptor_razon_social: str = data['receptor_razon_social']  # type: ignore
+        except KeyError as exc:
+            raise ValueError("Programming error: a referenced field is missing.") from exc
+
+        try:
+            dte_data = DteDataL2(
+                emisor_rut=emisor_rut,
+                tipo_dte=tipo_dte,
+                folio=folio,
+                fecha_emision_date=fecha_emision_date,
+                receptor_rut=receptor_rut,
+                monto_total=monto_total,
+                emisor_razon_social=emisor_razon_social,
+                receptor_razon_social=receptor_razon_social,
+                # fecha_vencimiento_date='',
+                # firma_documento_dt='',
+                # signature_value='',
+                # signature_x509_cert_der='',
+                # emisor_giro='',
+                # emisor_email='',
+                # receptor_email='',
+            )
+        except (TypeError, ValueError):
+            raise
+
+        return dte_data
+
+
+class RcvVentaCsvRowSchema(_RcvCsvRowSchemaBase):
+
+    FIELD_FECHA_RECEPCION_DT_TZ = DteDataL2.DATETIME_FIELDS_TZ
+    FIELD_FECHA_ACUSE_RECIBO_DT_TZ = DteDataL2.DATETIME_FIELDS_TZ
+    FIELD_FECHA_RECLAMO_DT_TZ = DteDataL2.DATETIME_FIELDS_TZ
+
+    class Meta:
+        strict = True
+
+    ###########################################################################
+    # basic fields
+    ###########################################################################
+
+    tipo_dte = mm_fields.TipoDteField(
+        required=True,
+        load_from='Tipo Doc',
+    )
+    folio = marshmallow.fields.Integer(
+        required=True,
+        load_from='Folio',
+    )
+    fecha_emision_date = mm_utils.CustomMarshmallowDateField(
+        format='%d/%m/%Y',  # e.g. '22/10/2018'
+        required=True,
+        load_from='Fecha Docto',
+    )
+    receptor_rut = mm_fields.RutField(
+        required=True,
+        load_from='Rut cliente',
+    )
+    monto_total = marshmallow.fields.Integer(
+        required=True,
+        load_from='Monto total',
+    )
+    receptor_razon_social = marshmallow.fields.String(
+        required=True,
+        load_from='Razon Social',
+    )
+
+    ###########################################################################
+    # fields whose value is set using data passed in the schema context
+    ###########################################################################
+
+    emisor_rut = mm_fields.RutField(
+        required=True,
+    )
+    emisor_razon_social = marshmallow.fields.String(
+        required=True,
+    )
+
+    ###########################################################################
+    # extra fields: not included in the returned struct
+    ###########################################################################
+
+    fecha_recepcion_dt = marshmallow.fields.DateTime(
+        format='%d/%m/%Y %H:%M:%S',  # e.g. '23/10/2018 01:54:13'
+        required=True,
+        load_from='Fecha Recepcion',
+    )
+    fecha_acuse_recibo_dt = marshmallow.fields.DateTime(
+        format='%d/%m/%Y %H:%M:%S',  # e.g. '23/10/2018 01:54:13'
+        required=False,
+        allow_none=True,
+        load_from='Fecha Acuse Recibo',
+    )
+    fecha_reclamo_dt = marshmallow.fields.DateTime(
+        format='%d/%m/%Y %H:%M:%S',  # e.g. '23/10/2018 01:54:13'
+        required=False,
+        allow_none=True,
+        load_from='Fecha Reclamo',
+    )
+
+    @marshmallow.pre_load
+    def preprocess(self, in_data: dict) -> dict:
+        # note: required fields checks are run later on automatically thus we may not assume that
+        #   values of required fields (`required=True`) exist.
+
+        # Set field value only if it was not in the input data.
+        in_data.setdefault('emisor_rut', self.context['emisor_rut'])
+        in_data.setdefault('emisor_razon_social', self.context['emisor_razon_social'])
+
+        # Fix missing/default values.
+        if 'Fecha Acuse Recibo' in in_data:
+            if in_data['Fecha Acuse Recibo'] == '':
+                in_data['Fecha Acuse Recibo'] = None
+        if 'Fecha Reclamo' in in_data:
+            if in_data['Fecha Reclamo'] == '':
+                in_data['Fecha Reclamo'] = None
+
+        return in_data
+
+    @marshmallow.post_load
+    def postprocess(self, data: dict) -> dict:
+        # >>> data['fecha_recepcion_dt'].isoformat()
+        # '2018-10-23T01:54:13'
+        data['fecha_recepcion_dt'] = tz_utils.convert_naive_dt_to_tz_aware(
+            dt=data['fecha_recepcion_dt'], tz=self.FIELD_FECHA_RECEPCION_DT_TZ)
+        # >>> data['fecha_recepcion_dt'].isoformat()
+        # '2018-10-23T01:54:13-03:00'
+        # >>> data['fecha_recepcion_dt'].astimezone(pytz.UTC).isoformat()
+        # '2018-10-23T04:54:13+00:00'
+
+        # note: to express this value in another timezone (but the value does not change), do
+        #   `dt_obj.astimezone(pytz.timezone('some timezone'))`
+
+        if 'fecha_acuse_recibo_dt' in data and data['fecha_acuse_recibo_dt']:
+            data['fecha_acuse_recibo_dt'] = tz_utils.convert_naive_dt_to_tz_aware(
+                dt=data['fecha_acuse_recibo_dt'], tz=self.FIELD_FECHA_ACUSE_RECIBO_DT_TZ)
+        if 'fecha_reclamo_dt' in data and data['fecha_reclamo_dt']:
+            data['fecha_reclamo_dt'] = tz_utils.convert_naive_dt_to_tz_aware(
+                dt=data['fecha_reclamo_dt'], tz=self.FIELD_FECHA_RECLAMO_DT_TZ)
+
+        return data
+
+
+###############################################################################
+# helpers
+###############################################################################
+
+class _RcvCsvDialect(csv.Dialect):
+
+    """
+    CSV dialect of RCV CSV files.
+
+    The properties of this dialect were determined with the help of
+    :class:`csv.Sniffer`.
+
+    >>> import gzip
+    >>> filename = 'SII-download-RCV-file-http-body-response.csv.gz'
+    >>> with gzip.open(filename, 'rt', encoding='utf-8') as f:
+    ...     dialect = csv.Sniffer().sniff(f.read(50 * 1024))
+
+    """
+
+    delimiter = ';'
+    quotechar = '"'
+    escapechar = None
+    doublequote = False
+    skipinitialspace = False
+    lineterminator = '\r\n'
+    quoting = csv.QUOTE_MINIMAL
+
+
+def _parse_rcv_csv_file(
+    input_csv_row_schema: _RcvCsvRowSchemaBase,
+    expected_input_field_names: Sequence[str],
+    fields_to_remove_names: Sequence[str],
+    input_file_path: str,
+    n_rows_offset: int,
+    max_n_rows: int = None,
+) -> Iterable[Tuple[Optional[DteDataL2], int, Dict[str, object], Dict[str, object]]]:
+    """
+    Parse DTE data objects from a RCV file (CSV).
+
+    Common implementation for the different kinds of RCV files (CSV).
+
+    """
+    for field_to_remove_name in fields_to_remove_names:
+        if field_to_remove_name not in expected_input_field_names:
+            raise Exception(
+                "Programming error: field to remove is not one of the expected ones.",
+                field_to_remove_name)
+
+    _CSV_ROW_DICT_EXTRA_FIELDS_KEY = '_extra_csv_fields_data'
+
+    fields_to_remove_names += (_CSV_ROW_DICT_EXTRA_FIELDS_KEY, )  # type: ignore
+
+    input_data_enc = 'utf-8'
+    # note:
+    #   > If csvfile is a file object, it should be opened with newline=''
+    #   https://docs.python.org/3/library/csv.html#csv.reader
+    with open(input_file_path, mode='rt', encoding=input_data_enc, newline='') as input_f:
+        # Create a CSV reader, with auto-detection of header names (first row).
+        csv_reader = csv_utils.create_csv_dict_reader(
+            input_f,
+            csv_dialect=_RcvCsvDialect,
+            row_dict_extra_fields_key=_CSV_ROW_DICT_EXTRA_FIELDS_KEY,
+            expected_fields_strict=True,
+            expected_field_names=expected_input_field_names,
+        )
+
+        g = rows_processing.csv_rows_mm_deserialization_iterator(
+            csv_reader,
+            row_schema=input_csv_row_schema,
+            n_rows_offset=n_rows_offset,
+            max_n_rows=max_n_rows,
+            fields_to_remove_names=fields_to_remove_names,
+        )
+
+        for row_ix, row_data, deserialized_row_data, validation_errors in g:
+            logger.debug("Processing row %s. Content: %s", row_ix, repr(row_data))
+
+            dte_data = None
+            row_errors: Dict[str, object] = {}
+            conversion_error = None
+
+            if not validation_errors:
+                try:
+                    dte_data = input_csv_row_schema.to_dte_data_l2(deserialized_row_data)
+                except Exception as exc:
+                    conversion_error = str(exc)
+                    logger.exception(
+                        "Deserialized data to data model instance conversion failed "
+                        "(probably a programming error).")
+
+            # Instead of empty dicts, lists, str, etc, we want to have None.
+            if validation_errors:
+                row_errors['validation'] = validation_errors
+            if conversion_error:
+                row_errors['other'] = conversion_error
+
+            yield dte_data, row_ix, row_data, row_errors
diff --git a/tests/test_rcv_parse_csv.py b/tests/test_rcv_parse_csv.py
new file mode 100644
index 00000000..6dc7741c
--- /dev/null
+++ b/tests/test_rcv_parse_csv.py
@@ -0,0 +1,22 @@
+import unittest
+
+from cl_sii.rcv.parse_csv import (  # noqa: F401
+    RcvVentaCsvRowSchema, parse_rcv_venta_csv_file, _parse_rcv_csv_file,
+)
+
+
+class RcvVentaCsvRowSchemaTest(unittest.TestCase):
+
+    # TODO: implement for 'RcvVentaCsvRowSchema'.
+    pass
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_parse_rcv_venta_csv_file(self) -> None:
+        # TODO: implement for 'parse_rcv_venta_csv_file'.
+        pass
+
+    def test__parse_rcv_csv_file(self) -> None:
+        # TODO: implement for '_parse_rcv_csv_file'.
+        pass

From 009e310745af01c7d2cd02c36532374ea663692c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 24 May 2019 20:21:19 -0400
Subject: [PATCH 7/8] HISTORY: update for new version

---
 HISTORY.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/HISTORY.rst b/HISTORY.rst
index ec01202d..c4b30a84 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -3,6 +3,15 @@
 History
 -------
 
+0.6.3 (2019-05-24)
++++++++++++++++++++++++
+
+* (PR #52, 2019-05-24) rcv: add module ``parse_csv``
+* (PR #51, 2019-05-24) libs: add module ``rows_processing``
+* (PR #50, 2019-05-24) libs: add module ``csv_utils``
+* (PR #49, 2019-05-24) libs.mm_utils: add ``validate_no_unexpected_input_fields``
+* (PR #48, 2019-05-24) dte.data_models: add ``DteDataL2.as_dte_data_l1``
+
 0.6.2 (2019-05-15)
 +++++++++++++++++++++++
 

From 4bea00322ede92e660b2372ca6560a22d36bba77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 24 May 2019 20:21:51 -0400
Subject: [PATCH 8/8] =?UTF-8?q?Bump=20version:=200.6.2=20=E2=86=92=200.6.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg   | 2 +-
 cl_sii/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 1e57eee7..cae36e13 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.6.2
+current_version = 0.6.3
 commit = True
 tag = True
 
diff --git a/cl_sii/__init__.py b/cl_sii/__init__.py
index d90243c9..6a73fc58 100644
--- a/cl_sii/__init__.py
+++ b/cl_sii/__init__.py
@@ -5,4 +5,4 @@
 """
 
 
-__version__ = '0.6.2'
+__version__ = '0.6.3'