From e17bd40b2d630437da5edd3e2fa7a41cbda3ba3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Thu, 4 Apr 2019 17:31:23 -0300
Subject: [PATCH 01/15] add sub-package `contribuyente`

For code related to "contribuyente" (tax payer).

Just constants for the moment.
---
 cl_sii/contribuyente/__init__.py  |  0
 cl_sii/contribuyente/constants.py | 17 +++++++++++++++++
 tests/test_contribuyente.py       |  1 +
 3 files changed, 18 insertions(+)
 create mode 100644 cl_sii/contribuyente/__init__.py
 create mode 100644 cl_sii/contribuyente/constants.py
 create mode 100644 tests/test_contribuyente.py

diff --git a/cl_sii/contribuyente/__init__.py b/cl_sii/contribuyente/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cl_sii/contribuyente/constants.py b/cl_sii/contribuyente/constants.py
new file mode 100644
index 00000000..1062a936
--- /dev/null
+++ b/cl_sii/contribuyente/constants.py
@@ -0,0 +1,17 @@
+"""
+Contribuyente-related constants.
+
+Source: XML types 'RznSocLargaType' and 'RznSocCortaType' in official schema
+'SiiTypes_v10.xsd'.
+https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/SiiTypes_v10.xsd#L635-L651
+
+"""
+
+
+# TODO: RAZON_SOCIAL_LONG_REGEX = re.compile(r'^...$')
+
+RAZON_SOCIAL_LONG_MAX_LENGTH = 100
+""""Razón Social" max length ("long version")."""
+
+RAZON_SOCIAL_SHORT_MAX_LENGTH = 40
+""""Razón Social" max length ("short version")."""
diff --git a/tests/test_contribuyente.py b/tests/test_contribuyente.py
new file mode 100644
index 00000000..30ed389a
--- /dev/null
+++ b/tests/test_contribuyente.py
@@ -0,0 +1 @@
+from cl_sii.contribuyente import constants  # noqa: F401

From e1a0cbf8b42e845ab06eab3909e177695b741101 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Thu, 4 Apr 2019 17:33:17 -0300
Subject: [PATCH 02/15] add sub-package `dte`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For code related to "dte" (Documento Tributario Electrónico).

Just an enum and constants for the moment.
---
 cl_sii/dte/__init__.py  |   0
 cl_sii/dte/constants.py | 100 ++++++++++++++++++++++++++++++++++++++++
 tests/test_dte.py       |   1 +
 3 files changed, 101 insertions(+)
 create mode 100644 cl_sii/dte/__init__.py
 create mode 100644 cl_sii/dte/constants.py
 create mode 100644 tests/test_dte.py

diff --git a/cl_sii/dte/__init__.py b/cl_sii/dte/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cl_sii/dte/constants.py b/cl_sii/dte/constants.py
new file mode 100644
index 00000000..f52ec186
--- /dev/null
+++ b/cl_sii/dte/constants.py
@@ -0,0 +1,100 @@
+"""
+DTE-related constants.
+
+Sources: official XML schemas 'SiiTypes_v10.xsd' and 'DTE_v10.xsd'.
+https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/
+
+"""
+import enum
+
+
+###############################################################################
+# DTE fields / Folio
+###############################################################################
+
+# XML element 'DTEDefType/Documento/Encabezado/IdDoc/Folio'
+# - description: "Folio del Documento Electronico"
+# - XML type: 'FolioType'
+# - source:
+#   https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/DTE_v10.xsd#L52-L56
+# XML type 'FolioType' in official schema 'SiiTypes_v10.xsd'.
+# - source:
+#   https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/SiiTypes_v10.xsd#L153-L160
+
+DTE_FOLIO_FIELD_TYPE = int
+"""DTE field 'Folio' type."""
+DTE_FOLIO_FIELD_MIN_VALUE = 1
+"""DTE field 'Folio' min value."""
+DTE_FOLIO_FIELD_MAX_VALUE = 10 ** 10
+"""DTE field 'Folio' max value."""
+
+
+###############################################################################
+# DTE fields / Monto Total
+###############################################################################
+
+# XML element 'DTEDefType/Documento/Encabezado/Totales/MntTotal'
+# - description: "Monto Total del DTE"
+# - XML type: 'MontoType'
+# - source:
+#   https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/DTE_v10.xsd#L1160-L1164
+# XML type 'MontoType' in official schema 'SiiTypes_v10.xsd'
+# - source:
+#   https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/SiiTypes_v10.xsd#L563-L570
+
+DTE_MONTO_TOTAL_FIELD_TYPE = int
+"""DTE field 'Monto Total' type."""
+DTE_MONTO_TOTAL_FIELD_MIN_VALUE = 0
+"""DTE field 'Monto Total' min value."""
+DTE_MONTO_TOTAL_FIELD_MAX_VALUE = 10 ** 18
+"""DTE field 'Monto Total' max value."""
+
+
+###############################################################################
+# DTE fields / Tipo de DTE
+###############################################################################
+
+# XML element 'DTEDefType/Documento/Encabezado/IdDoc/TipoDTE'
+# - description: "Tipo de DTE"
+# - XML type: 'DTEType'
+# - source:
+#   https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/DTE_v10.xsd#L47-L51
+
+DTE_TIPO_DTE_FIELD_TYPE = int
+"""DTE field 'Tipo de DTE' type."""
+DTE_TIPO_DTE_FIELD_MIN_VALUE = 1
+"""DTE field 'Tipo de DTE' min value."""
+# DTE_TIPO_DTE_FIELD_MAX_VALUE = ?
+# """DTE field 'Tipo de DTE' max value."""
+
+
+@enum.unique
+class TipoDteEnum(enum.IntEnum):
+
+    """
+    Enum of Tipo de DTE.
+
+    Source: XML type ``DTEType`` (enum) in official schema ``SiiTypes_v10.xsd``.
+    https://github.com/fyndata/lib-cl-sii-python/blob/8b51350/cl_sii/data/ref/factura_electronica/schema_dte/SiiTypes_v10.xsd#L63-L99
+
+    """
+
+    FACTURA_ELECTRONICA = 33
+    """Factura Electrónica."""
+
+    FACTURA_NO_AFECTA_O_EXENTA_ELECTRONICA = 34
+    """Factura no Afecta o Exenta Electrónica."""
+    # aka 'Factura Electrónica de Venta de Bienes y Servicios No afectos o Exento de IVA'
+
+    FACTURA_COMPRA_ELECTRONICA = 46
+    """Factura de Compra Electrónica."""
+    # Name should have been 'Factura Electrónica de Compra'.
+
+    GUIA_DESPACHO_ELECTRONICA = 52
+    """Guía de Despacho Electrónica."""
+
+    NOTA_DEBITO_ELECTRONICA = 56
+    """Nota de Débito Electrónica."""
+
+    NOTA_CREDITO_ELECTRONICA = 61
+    """Nota de Crédito Electrónica."""
diff --git a/tests/test_dte.py b/tests/test_dte.py
new file mode 100644
index 00000000..1d66c11b
--- /dev/null
+++ b/tests/test_dte.py
@@ -0,0 +1 @@
+from cl_sii.dte import constants  # noqa: F401

From c2ec77b7ec552805e8d699ffa7cf0b72e959e1a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Wed, 5 Dec 2018 11:48:20 -0300
Subject: [PATCH 03/15] requirements: add 'pytz'

"World timezone definitions, modern and historical".

https://pythonhosted.org/pytz/
https://launchpad.net/pytz
https://github.com/stub42/pytz
---
 requirements/base.txt | 1 +
 setup.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/requirements/base.txt b/requirements/base.txt
index f74d5c86..1d19d0e6 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -3,6 +3,7 @@
 
 # Required packages:
 marshmallow==2.16.3
+pytz==2018.9
 
 # Packages dependencies:
 #none
diff --git a/setup.py b/setup.py
index 7635ceab..6e939b35 100755
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,7 @@ def get_version(*file_paths: Sequence[str]) -> str:
 # TODO: add reasonable upper-bound for some of these packages?
 requirements = [
     'marshmallow>=2.16.3',
+    'pytz>=2018.7',
 ]
 
 extras_requirements = {

From e15d66c5ca9469141751c0b9d0bec811223870b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Thu, 4 Apr 2019 17:41:01 -0300
Subject: [PATCH 04/15] libs: add module `tz_utils`

Utils for dealing with timezone-aware datetime objects.

Test have not been implemented.
---
 cl_sii/libs/__init__.py     |  4 +++
 cl_sii/libs/tz_utils.py     | 66 +++++++++++++++++++++++++++++++++++++
 setup.cfg                   |  3 ++
 tests/test_libs_tz_utils.py | 16 +++++++++
 4 files changed, 89 insertions(+)
 create mode 100644 cl_sii/libs/__init__.py
 create mode 100644 cl_sii/libs/tz_utils.py
 create mode 100644 tests/test_libs_tz_utils.py

diff --git a/cl_sii/libs/__init__.py b/cl_sii/libs/__init__.py
new file mode 100644
index 00000000..ba1ab83c
--- /dev/null
+++ b/cl_sii/libs/__init__.py
@@ -0,0 +1,4 @@
+"""
+Package that contains code useful for :mod:`cl_sii` but not particular to it.
+
+"""
diff --git a/cl_sii/libs/tz_utils.py b/cl_sii/libs/tz_utils.py
new file mode 100644
index 00000000..8c6ec286
--- /dev/null
+++ b/cl_sii/libs/tz_utils.py
@@ -0,0 +1,66 @@
+from datetime import datetime
+from typing import Union
+
+import pytz
+import pytz.tzinfo
+
+
+# note: pytz does some magic with its timezone classes so we need to "invent" a parent class.
+PytzTimezone = Union[
+    pytz.tzinfo.BaseTzInfo,
+    pytz.tzinfo.StaticTzInfo,
+    pytz.tzinfo.DstTzInfo,
+    pytz._FixedOffset,  # type: ignore
+]
+
+
+UTC = pytz.UTC  # type: PytzTimezone
+TIMEZONE_CL_SANTIAGO = pytz.timezone('America/Santiago')  # type: PytzTimezone
+
+
+def get_now_tz_aware() -> datetime:
+    """
+    Return the current UTC date and time as a timezone-aware object.
+
+    >>> get_now_tz_aware()
+    datetime.datetime(2018, 10, 23, 1, 54, 13, tzinfo=<UTC>)
+
+    """
+    # The following implementation alternatives look prettier but are less-performant:
+    #   - `convert_naive_dt_to_tz_aware(dt=datetime.utcnow(), tz=pytz.UTC)`
+    #   - `pytz.UTC.localize(datetime.utcnow())`
+
+    # source: 'django.utils.timezone.now' @ Django 2.1.3
+    return datetime.utcnow().replace(tzinfo=UTC)
+
+
+def convert_naive_dt_to_tz_aware(dt: datetime, tz: PytzTimezone) -> datetime:
+    """
+    Convert an offset-naive datetime object to a timezone-aware one.
+
+    >>> dt_naive = datetime(2018, 10, 23, 1, 54, 13)
+    >>> dt_naive.isoformat()
+    datetime.datetime(2018, 10, 23, 1, 54, 13)
+    >>> dt_naive.isoformat()
+    '2018-10-23T01:54:13'
+
+    >>> dt_tz_aware_1 = convert_naive_dt_to_tz_aware(dt_naive, UTC)
+    >>> dt_tz_aware_1
+    datetime.datetime(2018, 10, 23, 1, 54, 13, tzinfo=<UTC>)
+    >>> dt_tz_aware_1.isoformat()
+    '2018-10-23T04:54:13+00:00'
+
+    >>> dt_tz_aware_2 = convert_naive_dt_to_tz_aware(dt_naive, TIMEZONE_CL_SANTIAGO)
+    >>> dt_tz_aware_2
+    datetime.datetime(2018, 10, 23, 1, 54, 13, tzinfo=<DstTzInfo 'America/Santiago'
+    -03-1 day, 21:00:00 DST>)
+    >>> dt_tz_aware_2.isoformat()
+    '2018-10-23T01:54:13-03:00'
+
+    :param dt: offset-naive datetime
+    :param tz: timezone e.g. ``pytz.timezone('America/Santiago')``
+    :raises ValueError: if ``dt`` is already timezone-aware
+
+    """
+    dt_tz_aware = tz.localize(dt)  # type: datetime
+    return dt_tz_aware
diff --git a/setup.cfg b/setup.cfg
index 5eeb1deb..93144517 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,6 +36,9 @@ ignore_missing_imports = True
 [mypy-rest_framework.*]
 ignore_missing_imports = True
 
+[mypy-pytz.*]
+ignore_missing_imports = True
+
 [flake8]
 ignore =
     # W503 line break before binary operator
diff --git a/tests/test_libs_tz_utils.py b/tests/test_libs_tz_utils.py
new file mode 100644
index 00000000..8d2bcb32
--- /dev/null
+++ b/tests/test_libs_tz_utils.py
@@ -0,0 +1,16 @@
+import unittest
+
+from cl_sii.libs.tz_utils import convert_naive_dt_to_tz_aware, get_now_tz_aware  # noqa: F401
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_get_now_tz_aware(self) -> None:
+        # TODO: implement!
+        # Reuse doctests/examples in function docstring.
+        pass
+
+    def test_convert_naive_dt_to_tz_aware(self) -> None:
+        # TODO: implement!
+        # Reuse doctests/examples in function docstring.
+        pass

From e5f3fd658a6f6575215e27ac871378d59c9b3695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Thu, 4 Apr 2019 17:43:38 -0300
Subject: [PATCH 05/15] libs: add module `mm_utils`

Custom marshmallow field `CustomMarshmallowDateField`.

Test have not been implemented.
---
 cl_sii/libs/mm_utils.py     | 92 +++++++++++++++++++++++++++++++++++++
 tests/test_libs_mm_utils.py | 10 ++++
 2 files changed, 102 insertions(+)
 create mode 100644 cl_sii/libs/mm_utils.py
 create mode 100644 tests/test_libs_mm_utils.py

diff --git a/cl_sii/libs/mm_utils.py b/cl_sii/libs/mm_utils.py
new file mode 100644
index 00000000..4c25ebeb
--- /dev/null
+++ b/cl_sii/libs/mm_utils.py
@@ -0,0 +1,92 @@
+from datetime import date, datetime
+from typing import Any, Union
+
+import marshmallow
+import marshmallow.fields
+import marshmallow.utils
+
+
+class CustomMarshmallowDateField(marshmallow.fields.Field):
+    """
+    A formatted date string.
+
+    Customizated alternative to :class:`marshmallow.fields.Date` that allows
+    setting a date format string (like :class:`marshmallow.fields.DateTime`
+    does).
+
+    Implementation largely based on ``marshmallow`` version 2.16.3, classes
+    :class:`marshmallow.fields.Date` and :class:`marshmallow.fields.DateTime`.
+
+    """
+
+    # note: function's return type must be 'str'.
+    DATEFORMAT_SERIALIZATION_FUNCS = {
+        'iso': date.isoformat,
+        'iso8601': date.isoformat,
+    }
+
+    # note: function's return type must be 'datetime.date'.
+    DATEFORMAT_DESERIALIZATION_FUNCS = {
+        'iso': marshmallow.utils.from_iso_date,
+        'iso8601': marshmallow.utils.from_iso_date,
+    }
+
+    DEFAULT_FORMAT = 'iso'
+
+    default_error_messages = {
+        'invalid': 'Not a valid date.',
+        'format': '"{input}" cannot be formatted as a date.',
+    }
+
+    def __init__(self, format: str = None, **kwargs: Any) -> None:
+        """Constructor.
+
+        :param format: Either ``"iso"`` (for ISO-8601) or a date format str.
+            If `None`, defaults to "iso".
+        :param kwargs: the same ones that :class:`Field` receives.
+
+        """
+        super().__init__(**kwargs)
+        # Allow this to be None. It may be set later in the ``_serialize``
+        # or ``_desrialize`` methods This allows a Schema to dynamically set the
+        # dateformat, e.g. from a Meta option
+        self.dateformat = format
+
+    def _add_to_schema(self, field_name: str, schema: marshmallow.Schema) -> None:
+        super()._add_to_schema(field_name, schema)
+        self.dateformat = self.dateformat or schema.opts.dateformat
+
+    def _serialize(self, value: date, attr: str, obj: object) -> Union[str, None]:
+        if value is None:
+            return None
+        self.dateformat = self.dateformat or self.DEFAULT_FORMAT
+        format_func = self.DATEFORMAT_SERIALIZATION_FUNCS.get(self.dateformat, None)
+        if format_func:
+            try:
+                date_str = format_func(value)
+            except (AttributeError, ValueError):
+                self.fail('format', input=value)
+        else:
+            date_str = value.strftime(self.dateformat)
+
+        return date_str
+
+    def _deserialize(self, value: str, attr: str, data: dict) -> date:
+        if not value:  # Falsy values, e.g. '', None, [] are not valid
+            self.fail('invalid')
+        self.dateformat = self.dateformat or self.DEFAULT_FORMAT
+        func = self.DATEFORMAT_DESERIALIZATION_FUNCS.get(self.dateformat)
+        if func:
+            try:
+                date_value = func(value)  # type: date
+            except (TypeError, AttributeError, ValueError):
+                self.fail('invalid')
+        elif self.dateformat:
+            try:
+                date_value = datetime.strptime(value, self.dateformat).date()
+            except (TypeError, AttributeError, ValueError):
+                self.fail('invalid')
+        else:
+            self.fail('invalid')
+
+        return date_value
diff --git a/tests/test_libs_mm_utils.py b/tests/test_libs_mm_utils.py
new file mode 100644
index 00000000..d1a0c8b3
--- /dev/null
+++ b/tests/test_libs_mm_utils.py
@@ -0,0 +1,10 @@
+import unittest
+
+from cl_sii.libs.mm_utils import CustomMarshmallowDateField  # noqa: F401
+
+
+class CustomMarshmallowDateFieldTest(unittest.TestCase):
+
+    def test_x(self) -> None:
+        # TODO: implement!
+        pass

From 1736a88252b91749b8fde0948f95ce5970a54fac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Tue, 11 Dec 2018 12:18:23 -0300
Subject: [PATCH 06/15] add sub-package `rcv`

Useful for dealing with SII RCV ("Registro de Compras y Ventas").
Its main feature is the function `process_rcv_csv_file`.

Test have not been implemented.
---
 cl_sii/rcv/__init__.py  |  72 +++++++++++++++
 cl_sii/rcv/parse.py     | 196 ++++++++++++++++++++++++++++++++++++++++
 tests/test_rcv.py       |  10 ++
 tests/test_rcv_parse.py |  16 ++++
 4 files changed, 294 insertions(+)
 create mode 100644 cl_sii/rcv/__init__.py
 create mode 100644 cl_sii/rcv/parse.py
 create mode 100644 tests/test_rcv.py
 create mode 100644 tests/test_rcv_parse.py

diff --git a/cl_sii/rcv/__init__.py b/cl_sii/rcv/__init__.py
new file mode 100644
index 00000000..03ca583b
--- /dev/null
+++ b/cl_sii/rcv/__init__.py
@@ -0,0 +1,72 @@
+"""
+SII RCV ("Registro de Compras y Ventas").
+
+.. note::
+    The RCV ("Registro de Compras y Ventas") is composed of 2 "registros":
+    RC ("Registro de Compras") and RV ("Registro de Ventas").
+
+.. seealso::
+    http://www.sii.cl/preguntas_frecuentes/catastro/001_012_6971.htm
+
+"""
+import csv
+import io
+from typing import Callable
+
+from . import parse
+
+
+def process_rcv_csv_file(
+    text_stream: io.TextIOBase,
+    rcv_owner_rut: str,
+    row_data_handler: Callable,
+    max_data_rows: int = None,
+) -> int:
+    """
+    Process a RCV CSV file.
+
+    Processing steps:
+    - Create a CSV reader, with auto-detection of header names (first row).
+    - Instantiate an schema to parse and deserialize each row.
+    - For each data row:
+        - Using an appropriate schema, deserialize the raw data.
+        - Apply ``row_data_handler`` to the deserialization output.
+
+    :param text_stream: a file-like object, not necessarily a real file
+    :param rcv_owner_rut: RCV file owner's RUT
+    :param row_data_handler: function be called with parsed row data
+    :param max_data_rows: max number of data rows to process (raise exception if exceeded);
+        ``None`` means no limit
+    :return: number of data rows processed
+
+    """
+    # TODO: convert to iterator. That way we do not need the 'row_data_handler' and we can also use
+    #   the same function to retrieve the collection of deserialized rows.
+
+    csv_reader = parse.create_rcv_csv_reader(text_stream, expected_fields_strict=True)
+    schema = parse.RcvCsvRowSchema(context=dict(receptor_rut=rcv_owner_rut))
+
+    try:
+        for row_ix, row_data in enumerate(csv_reader, start=1):
+            if max_data_rows is not None and row_ix > max_data_rows:
+                # TODO: custom exception
+                raise Exception("Exceeded 'max_data_rows' value: {}.".format(max_data_rows))
+
+            try:
+                deserialized_row_data = schema.deserialize_csv_row(row_data)
+            except Exception as exc:
+                exc_msg = "Error deserializing row {} of CSV file: {}".format(row_ix, exc)
+                raise Exception(exc_msg) from exc
+            try:
+                row_data_handler(row_ix, deserialized_row_data)
+            except Exception as exc:
+                exc_msg = "Error in row_data_handler for row {} of CSV file: {}".format(row_ix, exc)
+                raise Exception(exc_msg) from exc
+
+        # The first row in the CSV file is not a data row; it is the headers row.
+        rows_processed = csv_reader.line_num - 1
+    except csv.Error as exc:
+        exc_msg = "CSV error for line {} of CSV file: {}".format(csv_reader.line_num, exc)
+        raise Exception(exc_msg) from exc
+
+    return rows_processed
diff --git a/cl_sii/rcv/parse.py b/cl_sii/rcv/parse.py
new file mode 100644
index 00000000..8ad1ca5d
--- /dev/null
+++ b/cl_sii/rcv/parse.py
@@ -0,0 +1,196 @@
+import csv
+import io
+from collections import OrderedDict
+
+import marshmallow
+import marshmallow.fields
+import marshmallow.validate
+
+from cl_sii.extras import mm_fields
+
+from ..libs.mm_utils import CustomMarshmallowDateField
+from ..libs.tz_utils import convert_naive_dt_to_tz_aware, TIMEZONE_CL_SANTIAGO
+
+
+_CSV_ROW_DICT_EXTRA_FIELDS_KEY = None
+"""CSV row dict key under which the extra data in the row will be saved."""
+
+_RCV_CSV_EXPECTED_FIELD_NAMES = (
+    'Nro',
+    'Tipo Doc',
+    'Tipo Compra',
+    'RUT Proveedor',
+    'Razon Social',
+    'Folio',
+    'Fecha Docto',
+    'Fecha Recepcion',
+    'Fecha Acuse',
+    'Monto Exento',
+    'Monto Neto',
+    'Monto IVA Recuperable',
+    'Monto Iva No Recuperable',
+    'Codigo IVA No Rec.',
+    'Monto Total',
+    'Monto Neto Activo Fijo',
+    'IVA Activo Fijo',
+    'IVA uso Comun',
+    'Impto. Sin Derecho a Credito',
+    'IVA No Retenido',
+    'Tabacos Puros',
+    'Tabacos Cigarrillos',
+    'Tabacos Elaborados',
+    'NCE o NDE sobre Fact. de Compra',
+    'Codigo Otro Impuesto',
+    'Valor Otro Impuesto',
+    'Tasa Otro Impuesto',
+)
+_RCV_CSV_DIALECT_KEY = 'sii_rcv'
+
+
+class _RcvCsvDialect(csv.Dialect):
+
+    """
+    CSV dialect of RCV CSV files.
+
+    The properties of this dialect were determined with the help of
+    :class:`csv.Sniffer`.
+
+    >>> import gzip
+    >>> filename = 'SII-download-RCV-file-http-body-response.csv.gz'
+    >>> with gzip.open(filename, 'rt', encoding='utf-8') as f:
+    ...     dialect = csv.Sniffer().sniff(f.read(50 * 1024))
+
+    """
+
+    delimiter = ';'
+    quotechar = '"'
+    escapechar = None
+    doublequote = False
+    skipinitialspace = False
+    lineterminator = '\r\n'
+    quoting = csv.QUOTE_MINIMAL
+
+
+csv.register_dialect(_RCV_CSV_DIALECT_KEY, _RcvCsvDialect)
+
+
+class RcvCsvRowSchema(marshmallow.Schema):
+
+    EXPECTED_INPUT_FIELDS = tuple(_RCV_CSV_EXPECTED_FIELD_NAMES) + (_CSV_ROW_DICT_EXTRA_FIELDS_KEY, )  # type: ignore  # noqa: E501
+    FIELD_FECHA_RECEPCION_DATETIME_TZ = TIMEZONE_CL_SANTIAGO
+
+    class Meta:
+        strict = True
+
+    emisor_rut = mm_fields.RutField(
+        required=True,
+        load_from='RUT Proveedor',
+    )
+    tipo_dte = marshmallow.fields.Integer(
+        required=True,
+        load_from='Tipo Doc',
+    )
+    folio = marshmallow.fields.Integer(
+        required=True,
+        load_from='Folio',
+    )
+    fecha_emision_date = CustomMarshmallowDateField(
+        format='%d/%m/%Y',  # e.g. '22/10/2018'
+        required=True,
+        load_from='Fecha Docto',
+    )
+    fecha_recepcion_datetime = marshmallow.fields.DateTime(
+        format='%d/%m/%Y %H:%M:%S',  # e.g. '23/10/2018 01:54:13'
+        required=True,
+        load_from='Fecha Recepcion',
+    )
+    # note: this field value is set using data passed in the schema context.
+    receptor_rut = mm_fields.RutField(
+        required=True,
+    )
+    monto_total = marshmallow.fields.Integer(
+        required=True,
+        load_from='Monto Total',
+    )
+
+    @marshmallow.pre_load
+    def preprocess(self, in_data: dict) -> dict:
+        # note: required fields checks are run later on automatically thus we may not assume that
+        #   values of required fields (`required=True`) exist.
+
+        # Set field value only if it was not in the input data.
+        in_data.setdefault('receptor_rut', self.context['receptor_rut'])
+
+        return in_data
+
+    @marshmallow.post_load
+    def postprocess(self, data: dict) -> dict:
+        # >>> data['fecha_recepcion_datetime'].isoformat()
+        # '2018-10-23T01:54:13'
+        data['fecha_recepcion_datetime'] = convert_naive_dt_to_tz_aware(
+            dt=data['fecha_recepcion_datetime'], tz=self.FIELD_FECHA_RECEPCION_DATETIME_TZ)
+        # >>> data['fecha_recepcion_datetime'].isoformat()
+        # '2018-10-23T01:54:13-03:00'
+        # >>> data['fecha_recepcion_datetime'].astimezone(pytz.UTC).isoformat()
+        # '2018-10-23T04:54:13+00:00'
+
+        # note: to express this value in another timezone (but the value does not change), do
+        #   `datetime_obj.astimezone(pytz.timezone('some timezone'))`
+
+        return data
+
+    @marshmallow.validates_schema(pass_original=True)
+    def validate_schema(self, data: dict, original_data: dict) -> None:
+        # Fail validation if there was an unexpected input field.
+        unexpected_input_fields = (
+            set(original_data)
+            - set(self.fields)
+            - set(self.EXPECTED_INPUT_FIELDS)
+        )
+        if unexpected_input_fields:
+            raise marshmallow.ValidationError(
+                'Unexpected input field', field_names=list(unexpected_input_fields))
+
+    # @marshmallow.validates('field_x')
+    # def validate_field_x(self, value):
+    #     pass
+
+    ###########################################################################
+    # non-marshmallow-related methods
+    ###########################################################################
+
+    def deserialize_csv_row(self, row: OrderedDict) -> dict:
+        try:
+            result = self.load(row)  # type: marshmallow.UnmarshalResult
+        except marshmallow.ValidationError as exc:
+            exc_msg = "Validation errors during deserialization."
+            validation_error_msgs = dict(exc.normalized_messages())
+            raise ValueError(exc_msg, validation_error_msgs) from exc
+
+        result_data = result.data  # type: dict
+        result_errors = result.errors  # type: dict
+        if result_errors:
+            raise Exception("Deserialization errors: %s", result_errors)
+        return result_data
+
+
+def create_rcv_csv_reader(
+    text_stream: io.TextIOBase,
+    expected_fields_strict: bool = True,
+) -> csv.DictReader:
+    # note: mypy wrongly complains: it does not accept 'fieldnames' to be None but that value
+    #   is completely acceptable, and it even is the default!
+    #   > error: Argument "fieldnames" to "DictReader" has incompatible type "None"; expected
+    #   > "Sequence[str]"
+    csv_reader = csv.DictReader(  # type: ignore
+        text_stream,
+        fieldnames=None,  # the values of the first row will be used as the fieldnames
+        restkey=_CSV_ROW_DICT_EXTRA_FIELDS_KEY,
+        dialect=_RCV_CSV_DIALECT_KEY,
+    )
+    if expected_fields_strict and tuple(csv_reader.fieldnames) != _RCV_CSV_EXPECTED_FIELD_NAMES:
+        raise Exception(
+            "CSV file field names do not match those expected, or their order.",
+            csv_reader.fieldnames)
+
+    return csv_reader
diff --git a/tests/test_rcv.py b/tests/test_rcv.py
new file mode 100644
index 00000000..81f0d3f0
--- /dev/null
+++ b/tests/test_rcv.py
@@ -0,0 +1,10 @@
+import unittest
+
+from cl_sii.rcv import process_rcv_csv_file  # noqa: F401
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_process_rcv_csv_file(self) -> None:
+        # TODO: implement!
+        pass
diff --git a/tests/test_rcv_parse.py b/tests/test_rcv_parse.py
new file mode 100644
index 00000000..8cf1812b
--- /dev/null
+++ b/tests/test_rcv_parse.py
@@ -0,0 +1,16 @@
+import unittest
+
+from cl_sii.rcv.parse import RcvCsvRowSchema, create_rcv_csv_reader  # noqa: F401
+
+
+class RcvCsvRowSchemaTest(unittest.TestCase):
+
+    # TODO: implement!
+    pass
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_create_rcv_csv_reader(self) -> None:
+        # TODO: implement!
+        pass

From fc952669c038323d86c623bfb66a1490298acaf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 5 Apr 2019 11:05:53 -0300
Subject: [PATCH 07/15] README: misc improvements

---
 README.rst | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/README.rst b/README.rst
index 654a6cd0..6061226f 100644
--- a/README.rst
+++ b/README.rst
@@ -2,8 +2,25 @@
 cl-sii Python lib
 =================
 
+.. image:: https://img.shields.io/pypi/v/cl-sii.svg
+    :target: https://pypi.python.org/pypi/cl-sii
+    :alt: PyPI package version
+
+.. image:: https://img.shields.io/pypi/pyversions/cl-sii.svg
+    :target: https://pypi.python.org/pypi/cl-sii
+    :alt: Python versions
+
+.. image:: https://img.shields.io/pypi/l/cl-sii.svg
+    :target: https://pypi.python.org/pypi/cl-sii
+    :alt: License
+
 Python library for Servicio de Impuestos Internos (SII) of Chile.
 
+Documentation
+-------------
+
+The full documentation is at https://lib-cl-sii-python.readthedocs.io.
+
 Status
 -------------
 
@@ -11,9 +28,52 @@ Status
     :target: https://circleci.com/gh/fyndata/lib-cl-sii-python/tree/develop
     :alt: CI status
 
+.. image:: https://codecov.io/gh/fyndata/lib-cl-sii-python/branch/develop/graph/badge.svg
+    :target: https://codecov.io/gh/fyndata/lib-cl-sii-python
+    :alt: Code coverage
+
+.. image:: https://api.codeclimate.com/v1/badges/74408e5f8811f750ff3f/maintainability
+    :target: https://codeclimate.com/github/fyndata/lib-cl-sii-python/maintainability
+    :alt: Code Climate maintainability
+
+.. image:: https://readthedocs.org/projects/lib-cl-sii-python/badge/?version=latest
+    :target: https://lib-cl-sii-python.readthedocs.io/en/latest/?badge=latest
+    :alt: Documentation
 
 Supported Python versions
 -------------------------
 
 Only Python 3.7. Python 3.6 and below will not work because we use some features introduced in
 Python 3.7.
+
+Quickstart
+----------
+
+Install package::
+
+    pip install cl-sii
+
+And TODO
+
+Features
+--------
+
+* TODO
+
+Tests
++++++
+
+Requirements::
+
+    pip install -r requirements/test.txt
+
+Run test suite for all supported Python versions and run tools for
+code style analysis, static type check, etc::
+
+    make test-all
+    make lint
+
+Check code coverage of tests::
+
+    make test-coverage
+    make test-coverage-report-console

From 3987fb38f7cae2fb675ebe44f5aa645559f5ce2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 30 Nov 2018 17:45:05 -0300
Subject: [PATCH 08/15] requirements: add 'lxml'

"lxml is a Pythonic, mature binding for the libxml2 and libxslt
libraries. It provides safe and convenient access to these libraries
using the ElementTree API."

https://lxml.de/
https://github.com/lxml/lxml
---
 requirements/base.txt | 1 +
 setup.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/requirements/base.txt b/requirements/base.txt
index 1d19d0e6..aec4d6d9 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -2,6 +2,7 @@
 # note: it is mandatory to register all dependencies of the required packages.
 
 # Required packages:
+lxml==4.2.5
 marshmallow==2.16.3
 pytz==2018.9
 
diff --git a/setup.py b/setup.py
index 6e939b35..2cabda76 100755
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,7 @@ def get_version(*file_paths: Sequence[str]) -> str:
 
 # TODO: add reasonable upper-bound for some of these packages?
 requirements = [
+    'lxml>=4.2.5',
     'marshmallow>=2.16.3',
     'pytz>=2018.7',
 ]

From 6bac1df59852b8cb519628a76b7adbd6b12e5efe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Wed, 5 Dec 2018 11:39:14 -0300
Subject: [PATCH 09/15] requirements: add 'defusedxml'

"XML bomb protection for Python stdlib modules".

https://github.com/tiran/defusedxml
---
 requirements/base.txt | 1 +
 setup.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/requirements/base.txt b/requirements/base.txt
index aec4d6d9..0cb7cfe3 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -2,6 +2,7 @@
 # note: it is mandatory to register all dependencies of the required packages.
 
 # Required packages:
+defusedxml==0.5.0
 lxml==4.2.5
 marshmallow==2.16.3
 pytz==2018.9
diff --git a/setup.py b/setup.py
index 2cabda76..da24544f 100755
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,7 @@ def get_version(*file_paths: Sequence[str]) -> str:
 
 # TODO: add reasonable upper-bound for some of these packages?
 requirements = [
+    'defusedxml>=0.5.0',
     'lxml>=4.2.5',
     'marshmallow>=2.16.3',
     'pytz>=2018.7',

From 2bdb40973a7ffa6ee97bad4683fdfd49e9cdb44a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 5 Apr 2019 11:14:50 -0300
Subject: [PATCH 10/15] rcv.parse: minor import changes

---
 cl_sii/rcv/parse.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/cl_sii/rcv/parse.py b/cl_sii/rcv/parse.py
index 8ad1ca5d..0e5547d9 100644
--- a/cl_sii/rcv/parse.py
+++ b/cl_sii/rcv/parse.py
@@ -7,9 +7,8 @@
 import marshmallow.validate
 
 from cl_sii.extras import mm_fields
-
-from ..libs.mm_utils import CustomMarshmallowDateField
-from ..libs.tz_utils import convert_naive_dt_to_tz_aware, TIMEZONE_CL_SANTIAGO
+from cl_sii.libs import mm_utils
+from cl_sii.libs import tz_utils
 
 
 _CSV_ROW_DICT_EXTRA_FIELDS_KEY = None
@@ -77,7 +76,7 @@ class _RcvCsvDialect(csv.Dialect):
 class RcvCsvRowSchema(marshmallow.Schema):
 
     EXPECTED_INPUT_FIELDS = tuple(_RCV_CSV_EXPECTED_FIELD_NAMES) + (_CSV_ROW_DICT_EXTRA_FIELDS_KEY, )  # type: ignore  # noqa: E501
-    FIELD_FECHA_RECEPCION_DATETIME_TZ = TIMEZONE_CL_SANTIAGO
+    FIELD_FECHA_RECEPCION_DATETIME_TZ = tz_utils.TIMEZONE_CL_SANTIAGO
 
     class Meta:
         strict = True
@@ -94,7 +93,7 @@ class Meta:
         required=True,
         load_from='Folio',
     )
-    fecha_emision_date = CustomMarshmallowDateField(
+    fecha_emision_date = mm_utils.CustomMarshmallowDateField(
         format='%d/%m/%Y',  # e.g. '22/10/2018'
         required=True,
         load_from='Fecha Docto',
@@ -127,7 +126,7 @@ def preprocess(self, in_data: dict) -> dict:
     def postprocess(self, data: dict) -> dict:
         # >>> data['fecha_recepcion_datetime'].isoformat()
         # '2018-10-23T01:54:13'
-        data['fecha_recepcion_datetime'] = convert_naive_dt_to_tz_aware(
+        data['fecha_recepcion_datetime'] = tz_utils.convert_naive_dt_to_tz_aware(
             dt=data['fecha_recepcion_datetime'], tz=self.FIELD_FECHA_RECEPCION_DATETIME_TZ)
         # >>> data['fecha_recepcion_datetime'].isoformat()
         # '2018-10-23T01:54:13-03:00'

From 93de7f9b27983bfcd9ae07c1b18e2bb235cabdd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Tue, 18 Dec 2018 15:44:14 -0300
Subject: [PATCH 11/15] libs: add module `xml_utils`

Add helper functions (plus some related exception classes).

- `parse_untrusted_xml`: handles the numerous issues and concerns
  related to parsing untrusted XML data.
- `read_xml_schema`: instantiate an XML schema object from a file.
- `validate_xml_doc`: validate an XML document against an XML schema.

Sources of files in 'test_data/xml/attacks':
- https://en.wikipedia.org/wiki/Billion_laughs_attack#Code_example
- https://pypi.org/project/defusedxml/#billion-laughs-exponential-entity-expansion
- https://pypi.org/project/defusedxml/#external-entity-expansion-remote
- https://pypi.org/project/defusedxml/#quadratic-blowup-entity-expansion

Some tests have not been implemented.
---
 cl_sii/libs/xml_utils.py                      | 239 ++++++++++++++++++
 setup.cfg                                     |   6 +
 .../xml/attacks/billion-laughs-1.xml          |  16 ++
 .../xml/attacks/billion-laughs-2.xml          |   8 +
 .../external-entity-expansion-remote.xml      |   5 +
 .../quadratic-blowup-entity-expansion.xml     |   5 +
 tests/test_data/xml/trivial.xml               |   5 +
 tests/test_xml_utils.py                       | 101 ++++++++
 tests/utils.py                                |  15 ++
 9 files changed, 400 insertions(+)
 create mode 100644 cl_sii/libs/xml_utils.py
 create mode 100644 tests/test_data/xml/attacks/billion-laughs-1.xml
 create mode 100644 tests/test_data/xml/attacks/billion-laughs-2.xml
 create mode 100644 tests/test_data/xml/attacks/external-entity-expansion-remote.xml
 create mode 100644 tests/test_data/xml/attacks/quadratic-blowup-entity-expansion.xml
 create mode 100644 tests/test_data/xml/trivial.xml
 create mode 100644 tests/test_xml_utils.py
 create mode 100644 tests/utils.py

diff --git a/cl_sii/libs/xml_utils.py b/cl_sii/libs/xml_utils.py
new file mode 100644
index 00000000..1a9fd0a3
--- /dev/null
+++ b/cl_sii/libs/xml_utils.py
@@ -0,0 +1,239 @@
+import logging
+import os
+
+import defusedxml
+import defusedxml.lxml
+import lxml.etree
+import xml.parsers.expat
+import xml.parsers.expat.errors
+
+
+logger = logging.getLogger(__name__)
+
+
+###############################################################################
+# exceptions
+###############################################################################
+
+class BaseXmlParsingError(Exception):
+
+    """
+    Base class for all XML parsing errors.
+    """
+
+
+class XmlSyntaxError(BaseXmlParsingError):
+
+    """
+    The value to be parsed is syntactically invalid XML.
+
+    It is also possible that some cases of maliciously constructed data are
+    reported as syntactically invalid XML e.g. a "billion laughs" attack.
+
+    """
+
+
+class XmlFeatureForbidden(BaseXmlParsingError):
+
+    """
+    The parsed XML contains/uses a feature that is forbidden.
+
+    Usually an XML feature is forbidden for security reasons, to prevent
+    some attack vectors.
+
+    .. seealso::
+        https://docs.python.org/3/library/xml.html#xml-vulnerabilities
+
+    """
+
+
+class UnknownXmlParsingError(BaseXmlParsingError):
+
+    """
+    An unkwnown XML parsing error or for which there is no handling implementation.
+
+    It is useful because the XML parsing process indirectly uses many
+    (standard and 3rd party) libraries, some of them with native
+    implementations and/or with a lot of obscure Python magic.
+
+    """
+
+
+class XmlSchemaDocValidationError(Exception):
+
+    """
+    XML document did not be validate against an XML schema.
+
+    """
+
+
+###############################################################################
+# functions
+###############################################################################
+
+def parse_untrusted_xml(value: bytes) -> lxml.etree.ElementBase:
+    """
+    Parse XML-encoded content in value.
+
+    .. note::
+        It is ok to use it for parsing untrusted or unauthenticated data.
+        See https://docs.python.org/3/library/xml.html#xml-vulnerabilities
+
+    .. warning::
+        It is possible that for some cases of maliciously constructed data an
+        ``XmlSyntaxError`` will be raised instead of a ``XmlFeatureForbidden``
+        exception.
+
+    :raises TypeError:
+    :raises XmlSyntaxError: if it is not syntactically valid XML
+    :raises XmlFeatureForbidden: if the parsed XML document contains/uses a
+        feature that is forbidden
+    :raises UnknownXmlParsingError: unkwnown XML parsing error or for which
+        there is no handling implementation
+
+    """
+    # TODO: limit input max size (it might not be straightforward if value is a generator, which
+    #   would be desirable).
+
+    if not isinstance(value, bytes):
+        raise TypeError("Value to be parsed as XML must be bytes.")
+
+    # note: with this call, 'defusedxml' will
+    # - create a custom parser (instance of 'lxml.etree.XMLParser'), which is what will
+    #   fundamentally add safety to the parsing (e.g. using 'defusedxml.lxml.RestrictedElement'
+    #   as a custom version of 'lxml.etree.ElementBase'),
+    # - call the original 'lxml.etree.fromstring' (binary code),
+    # - run 'defusedxml.lxml.check_docinfo'.
+
+    # warning: do NOT change the exception handling order.
+    try:
+
+        xml_root_em = defusedxml.lxml.fromstring(
+            text=value,
+            parser=None,           # default: None (a custom one will be created)
+            base_url=None,         # default: None
+            forbid_dtd=False,      # default: False (allow Document Type Definition)
+            forbid_entities=True,  # default: True (forbid Entity definitions/declarations)
+        )  # type: lxml.etree.ElementBase
+
+    except (defusedxml.DTDForbidden,
+            defusedxml.EntitiesForbidden,
+            defusedxml.ExternalReferenceForbidden) as exc:
+        # note: we'd rather use 'defusedxml.DefusedXmlException' but that would catch
+        #   'defusedxml.NotSupportedError' as well
+
+        raise XmlFeatureForbidden("XML uses or contains a forbidden feature.") from exc
+
+    except lxml.etree.XMLSyntaxError as exc:
+        # note: the MRO of this exception class is:
+        # - XMLSyntaxError: "Syntax error while parsing an XML document."
+        # - ParseError: "Syntax error while parsing an XML document."
+        #   note: do not confuse it with the almost identically named 'lxml.etree.ParserError'
+        #   ("Internal lxml parser error"), whose parent class *is not* 'LxmlSyntaxError'.
+        # - LxmlSyntaxError: "Base class for all syntax errors."
+        # - LxmlError: "Main exception base class for lxml. All other exceptions inherit from
+        #   this one.
+        # - lxml.etree.Error: "Common base class for all non-exit exceptions."
+
+        # 'exc.msg' is a user-friendly error msg and includes the reference to line and column
+        #   e.g. "Detected an entity reference loop, line 1, column 7".
+        # Thus we do not need these attributes: (exc.position, exc.lineno, exc.offset)
+        exc_msg = "XML syntax error. {}.".format(exc.msg)
+        raise XmlSyntaxError(exc_msg) from exc
+
+    except xml.parsers.expat.ExpatError as exc:
+        # TODO: if this is reached it means we should improve this exception handler (even if
+        #   it is just to raise the same exception with a different message) because
+        #   it is a good idea to determine whether the source of the problem really is the
+        #   XML-encoded content.
+
+        # https://docs.python.org/3/library/pyexpat.html#expaterror-exceptions
+        # https://docs.python.org/3/library/pyexpat.html#xml.parsers.expat.errors.messages
+        # e.g.
+        #   "unknown encoding"
+        #   "mismatched tag"
+        #   "parsing aborted"
+        #   "out of memory"
+
+        # For sanity crop the XML-encoded content to max 1 KiB (arbitrary value).
+        log_msg = "Unexpected XML 'ExpatError' at line {} offset {}: {}. Content: %s".format(
+            exc.lineno, exc.offset, xml.parsers.expat.errors.messages[exc.code])
+        logger.exception(log_msg, str(value[:1024]))
+
+        exc_msg = "Unexpected error while parsing value as XML. Line {}, offset {}.".format(
+            exc.lineno, exc.offset)
+        raise UnknownXmlParsingError(exc_msg) from exc
+
+    except lxml.etree.LxmlError as exc:
+        # TODO: if this is reached it means we should add another exception handler (even if
+        #   it is just to raise the same exception with the same message) because it is a good
+        #   idea to determine whether the source of the problem really is the response content.
+
+        # For sanity crop the XML-encoded content to max 1 KiB (arbitrary value).
+        log_msg = "Unexpected 'LxmlError' that is not an 'XMLSyntaxError'. Content: %s"
+        logger.exception(log_msg, str(value[:1024]))
+
+        exc_msg = "Unexpected error while parsing value as XML."
+        raise UnknownXmlParsingError(exc_msg) from exc
+
+    except ValueError as exc:
+        # TODO: if this is reached it means we should add another exception handler (even if
+        #   it is just to raise the same exception with the same message) because it is a good
+        #   idea to determine whether the source of the problem really is the response content.
+
+        # For sanity crop the XML-encoded content to max 1 KiB (arbitrary value).
+        log_msg = "Unexpected error while parsing value as XML. Content: %s"
+        logger.exception(log_msg, str(value[:1024]))
+
+        exc_msg = "Unexpected error while parsing value as XML."
+        raise UnknownXmlParsingError(exc_msg) from exc
+
+    return xml_root_em
+
+
+def read_xml_schema(filename: str) -> lxml.etree.XMLSchema:
+    """
+    Instantiate an XML schema object from a file.
+
+    :raises ValueError: if there is no file at ``filename``
+
+    """
+    if os.path.exists(filename) and os.path.isfile(filename):
+        return lxml.etree.XMLSchema(file=filename)
+    raise ValueError("XML schema file not found.", filename)
+
+
+def validate_xml_doc(xml_schema: lxml.etree.XMLSchema, xml_doc: lxml.etree.ElementBase) -> None:
+    """
+    Validate ``xml_doc`` against XML schema ``xml_schema``.
+
+    :raises XmlSchemaDocValidationError: if ``xml_doc`` did not be validate
+        against ``xml_schema``
+
+    """
+    # There are several ways to validate 'xml_doc' according to an 'xml_schema'.
+    #   Different calls and what happens if validation passes or fails:
+    #   - xml_schema.assert_(xml_doc): nothign / raises 'AssertionError'
+    #   - xml_schema.assertValid(xml_doc): nothing / raises 'DocumentInvalid'
+    #   - xml_schema.validate(xml_doc): returns True / returns False
+
+    try:
+        xml_schema.assertValid(xml_doc)
+    except lxml.etree.DocumentInvalid as exc:
+        # note: 'exc.error_log' and 'xml_schema.error_log' are the same object
+        #   (type 'lxml.etree._ListErrorLog').
+
+        # TODO: advanced error details parsing, without leaking too much information.
+        # xml_error_log = exc.error_log  # type: lxml.etree._ListErrorLog
+        # last_xml_error = exc.error_log.last_error  # type: lxml.etree._LogEntry
+        # last_xml_error_xml_doc_line = last_xml_error.line
+
+        # TODO: does 'xml_schema.error_log' persist? is it necessary to clear it afterwards?
+        #   `xml_schema._clear_error_log()`
+
+        # Simplest and safest way to get the error message.
+        # Error example:
+        #   "Element 'DTE': No matching global declaration available for the validation root., line 2"  # noqa: E501
+        validation_error_msg = str(exc)
+
+        raise XmlSchemaDocValidationError(validation_error_msg) from exc
diff --git a/setup.cfg b/setup.cfg
index 93144517..79258364 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,9 +27,15 @@ disallow_untyped_defs = True
 check_untyped_defs = True
 warn_return_any = True
 
+[mypy-defusedxml.*]
+ignore_missing_imports = True
+
 [mypy-django.*]
 ignore_missing_imports = True
 
+[mypy-lxml.*]
+ignore_missing_imports = True
+
 [mypy-marshmallow.*]
 ignore_missing_imports = True
 
diff --git a/tests/test_data/xml/attacks/billion-laughs-1.xml b/tests/test_data/xml/attacks/billion-laughs-1.xml
new file mode 100644
index 00000000..0cb7b935
--- /dev/null
+++ b/tests/test_data/xml/attacks/billion-laughs-1.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<!-- Source: https://en.wikipedia.org/wiki/Billion_laughs_attack#Code_example -->
+<!DOCTYPE lolz [
+ <!ENTITY lol "lol">
+ <!ELEMENT lolz (#PCDATA)>
+ <!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
+ <!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
+ <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
+ <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
+ <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
+ <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
+ <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
+ <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
+ <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
+]>
+<lolz>&lol9;</lolz>
diff --git a/tests/test_data/xml/attacks/billion-laughs-2.xml b/tests/test_data/xml/attacks/billion-laughs-2.xml
new file mode 100644
index 00000000..14c8ae80
--- /dev/null
+++ b/tests/test_data/xml/attacks/billion-laughs-2.xml
@@ -0,0 +1,8 @@
+<!-- Source: https://pypi.org/project/defusedxml/#billion-laughs-exponential-entity-expansion -->
+<!DOCTYPE xmlbomb [
+<!ENTITY a "1234567890" >
+<!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;">
+<!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;">
+<!ENTITY d "&c;&c;&c;&c;&c;&c;&c;&c;">
+]>
+<bomb>&d;</bomb>
diff --git a/tests/test_data/xml/attacks/external-entity-expansion-remote.xml b/tests/test_data/xml/attacks/external-entity-expansion-remote.xml
new file mode 100644
index 00000000..b5eb9a10
--- /dev/null
+++ b/tests/test_data/xml/attacks/external-entity-expansion-remote.xml
@@ -0,0 +1,5 @@
+<!-- Source: https://pypi.org/project/defusedxml/#external-entity-expansion-remote -->
+<!DOCTYPE external [
+<!ENTITY ee SYSTEM "http://www.python.org/some.xml">
+]>
+<root>&ee;</root>
diff --git a/tests/test_data/xml/attacks/quadratic-blowup-entity-expansion.xml b/tests/test_data/xml/attacks/quadratic-blowup-entity-expansion.xml
new file mode 100644
index 00000000..84bd11c1
--- /dev/null
+++ b/tests/test_data/xml/attacks/quadratic-blowup-entity-expansion.xml
@@ -0,0 +1,5 @@
+<!-- Source: https://pypi.org/project/defusedxml/#quadratic-blowup-entity-expansion -->
+<!DOCTYPE bomb [
+<!ENTITY a "xxxxxxx... a couple of ten thousand chars">
+]>
+<bomb>&a;&a;&a;... repeat</bomb>
diff --git a/tests/test_data/xml/trivial.xml b/tests/test_data/xml/trivial.xml
new file mode 100644
index 00000000..69a940b8
--- /dev/null
+++ b/tests/test_data/xml/trivial.xml
@@ -0,0 +1,5 @@
+<root>
+   <element key='value'>text</element>
+   <element>text</element>tail
+   <empty-element/>
+</root>
diff --git a/tests/test_xml_utils.py b/tests/test_xml_utils.py
new file mode 100644
index 00000000..bb04316e
--- /dev/null
+++ b/tests/test_xml_utils.py
@@ -0,0 +1,101 @@
+import unittest
+
+import lxml.etree
+
+from cl_sii.libs.xml_utils import (  # noqa: F401
+    XmlSyntaxError, XmlFeatureForbidden,
+    parse_untrusted_xml, read_xml_schema, validate_xml_doc,
+)
+
+from .utils import read_test_file_bytes
+
+
+class FunctionParseUntrustedXmlTests(unittest.TestCase):
+
+    def test_parse_untrusted_xml_valid(self) -> None:
+        value = (
+            b'<root>\n'
+            b'   <element key="value">text</element>\n'
+            b'   <element>text</element>tail\n'
+            b'   <empty-element/>\n'
+            b'</root>')
+        xml = parse_untrusted_xml(value)
+        self.assertIsInstance(xml, lxml.etree.ElementBase)
+        # print(xml)
+        self.assertEqual(
+            lxml.etree.tostring(xml, pretty_print=False),
+            value)
+
+    def test_bytes_text(self) -> None:
+        value = b'not xml'  # type: ignore
+        with self.assertRaises(XmlSyntaxError) as cm:
+            parse_untrusted_xml(value)
+
+        self.assertSequenceEqual(
+            cm.exception.args,
+            ("XML syntax error. Start tag expected, '<' not found, line 1, column 1.", )
+        )
+
+    def test_attack_billion_laughs_1(self) -> None:
+        value = read_test_file_bytes('test_data/xml/attacks/billion-laughs-1.xml')
+        with self.assertRaises(XmlSyntaxError) as cm:
+            parse_untrusted_xml(value)
+
+        self.assertSequenceEqual(
+            cm.exception.args,
+            ("XML syntax error. Detected an entity reference loop, line 1, column 7.", )
+        )
+
+    def test_attack_billion_laughs_2(self) -> None:
+        value = read_test_file_bytes('test_data/xml/attacks/billion-laughs-2.xml')
+        with self.assertRaises(XmlSyntaxError) as cm:
+            parse_untrusted_xml(value)
+
+        self.assertSequenceEqual(
+            cm.exception.args,
+            ("XML syntax error. Detected an entity reference loop, line 1, column 4.", )
+        )
+
+    def test_attack_quadratic_blowup(self) -> None:
+        value = read_test_file_bytes('test_data/xml/attacks/quadratic-blowup-entity-expansion.xml')
+        with self.assertRaises(XmlFeatureForbidden) as cm:
+            parse_untrusted_xml(value)
+
+        self.assertSequenceEqual(
+            cm.exception.args,
+            ("XML uses or contains a forbidden feature.", )
+        )
+
+    def test_attack_external_entity_expansion_remote(self) -> None:
+        value = read_test_file_bytes('test_data/xml/attacks/external-entity-expansion-remote.xml')
+        with self.assertRaises(XmlFeatureForbidden) as cm:
+            parse_untrusted_xml(value)
+
+        self.assertSequenceEqual(
+            cm.exception.args,
+            ("XML uses or contains a forbidden feature.", )
+        )
+
+    def test_type_error(self) -> None:
+        value = 1  # type: ignore
+        with self.assertRaises(TypeError) as cm:
+            parse_untrusted_xml(value)
+
+        self.assertSequenceEqual(
+            cm.exception.args,
+            ("Value to be parsed as XML must be bytes.", )
+        )
+
+
+class FunctionReadXmlSchemaTest(unittest.TestCase):
+
+    # TODO: implement
+
+    pass
+
+
+class FunctionValidateXmlDocTest(unittest.TestCase):
+
+    # TODO: implement
+
+    pass
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 00000000..ae424d5d
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,15 @@
+import os
+
+
+_TESTS_DIR_PATH = os.path.dirname(__file__)
+
+
+def read_test_file_bytes(path: str) -> bytes:
+    filepath = os.path.join(
+        _TESTS_DIR_PATH,
+        path,
+    )
+    with open(filepath, mode='rb') as file:
+        content = file.read()
+
+    return content

From 136eb7abfb984e09a98b1e83c6ce535f086cc767 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Tue, 15 Jan 2019 21:25:45 -0300
Subject: [PATCH 12/15] dte: add module `data_models`

Include the following 'dataclasses' (and some related validators):
- `DteNaturalKey`
- `DteDataL0`
- `DteDataL1`
- `DteDataL2`

Tests have not been implemented.
---
 cl_sii/dte/data_models.py     | 287 ++++++++++++++++++++++++++++++++++
 tests/test_dte_data_models.py |  48 ++++++
 2 files changed, 335 insertions(+)
 create mode 100644 cl_sii/dte/data_models.py
 create mode 100644 tests/test_dte_data_models.py

diff --git a/cl_sii/dte/data_models.py b/cl_sii/dte/data_models.py
new file mode 100644
index 00000000..14a32553
--- /dev/null
+++ b/cl_sii/dte/data_models.py
@@ -0,0 +1,287 @@
+import dataclasses
+from dataclasses import field as dc_field
+from datetime import date
+from typing import Dict, Optional
+
+import cl_sii.contribuyente.constants
+import cl_sii.rut.constants
+from cl_sii.rut import Rut
+
+from . import constants
+from .constants import TipoDteEnum
+
+
+def validate_dte_folio(value: int) -> None:
+    """
+    Validate value for DTE field ``folio``.
+
+    :raises ValueError:
+    :raises TypeError:
+
+    """
+    # note: mypy gets confused and complains about "Unsupported operand types for >/<".
+    if (value < constants.DTE_FOLIO_FIELD_MIN_VALUE  # type: ignore
+            or value > constants.DTE_FOLIO_FIELD_MAX_VALUE):  # type: ignore
+        raise ValueError("Value is out of the valid range for 'folio'.")
+
+
+def validate_dte_monto_total(value: int) -> None:
+    """
+    Validate value for DTE field ``monto_total``.
+
+    :raises ValueError:
+    :raises TypeError:
+
+    """
+    # note: mypy gets confused and complains about "Unsupported operand types for >/<".
+    if (value < constants.DTE_MONTO_TOTAL_FIELD_MIN_VALUE  # type: ignore
+            or value > constants.DTE_MONTO_TOTAL_FIELD_MAX_VALUE):  # type: ignore
+        raise ValueError("Value is out of the valid range for 'monto_total'.")
+
+
+def validate_contribuyente_razon_social(value: str) -> None:
+    """
+    Validate value for the "razón social" of a "contribuyente".
+
+    :raises ValueError:
+    :raises TypeError:
+
+    """
+    if len(value) > len(value.strip()):
+        raise ValueError("Value must not have leading or trailing whitespace.")
+
+    if len(value) < 1:
+        raise ValueError("Value must not be empty.")
+
+    if len(value) > cl_sii.contribuyente.constants.RAZON_SOCIAL_LONG_MAX_LENGTH:
+        raise ValueError("Value exceeds max allowed length.")
+
+
+@dataclasses.dataclass(frozen=True)
+class DteNaturalKey:
+
+    """
+    Natural key of a DTE.
+
+    The class instances are immutable.
+
+    This group of fields uniquely identifies a DTE.
+
+    >>> instance = DteNaturalKey(Rut('60910000-1'), TipoDteEnum.FACTURA_ELECTRONICA, 2093465)
+
+    >>> str(instance)
+    "DteNaturalKey(" \
+    "emisor_rut=Rut('60910000-1'), tipo_dte=<TipoDteEnum.FACTURA_ELECTRONICA: 33>, folio=2093465)"
+    >>> str(instance) == repr(instance)
+    True
+    >>> instance.slug
+    '60910000-1--33--2093465'
+
+    """
+
+    emisor_rut: Rut = dc_field()
+    """
+    RUT of the "emisor" of the DTE.
+    """
+
+    tipo_dte: TipoDteEnum = dc_field()
+    """
+    The kind of DTE.
+    """
+
+    folio: int = dc_field()
+    """
+    The sequential number of a DTE of given kind issued by 'emisor_rut'.
+    """
+
+    def __post_init__(self) -> None:
+        """
+        Run validation automatically after setting the fields values.
+
+        :raises TypeError, ValueError:
+
+        """
+
+        if not isinstance(self.emisor_rut, Rut):
+            raise TypeError("Inappropriate type of 'emisor_rut'.")
+
+        if not isinstance(self.tipo_dte, TipoDteEnum):
+            raise TypeError("Inappropriate type of 'tipo_dte'.")
+
+        if not isinstance(self.folio, int):
+            raise TypeError("Inappropriate type of 'folio'.")
+
+        validate_dte_folio(self.folio)
+
+    def as_dict(self) -> Dict[str, object]:
+        return dataclasses.asdict(self)
+
+    @property
+    def slug(self) -> str:
+        """
+        Return an slug representation (that preserves uniquess) of the instance.
+        """
+        # note: many alternatives were considered and discarded such as:
+        #   f'{self.emisor_rut}-{self.tipo_dte}-{self.folio}'
+        #   f'{self.emisor_rut}.{self.tipo_dte}.{self.folio}'
+        #   f'{self.emisor_rut}/{self.tipo_dte}/{self.folio}'
+        #   f'R-{self.emisor_rut}-T-{self.tipo_dte}-F-{self.folio}'
+        #   f'rut-{self.emisor_rut}-tipo-{self.tipo_dte}-folio-{self.folio}'
+
+        return f'{self.emisor_rut}--{self.tipo_dte}--{self.folio}'
+
+
+@dataclasses.dataclass(frozen=True)
+class DteDataL0(DteNaturalKey):
+
+    """
+    DTE data level 0.
+
+    Its fields are enough to uniquely identify a DTE but nothing more.
+
+    The class instances are immutable.
+
+    >>> instance = DteDataL0(
+    ...     Rut('60910000-1'), TipoDteEnum.FACTURA_ELECTRONICA, 2093465, date(2018, 5, 7),
+    ...     Rut('60910000-1'), 10403)
+
+    >>> str(instance)
+    "DteDataL0(" \
+    "emisor_rut=Rut('60910000-1'), tipo_dte=<TipoDteEnum.FACTURA_ELECTRONICA: 33>, " \
+    "folio=2093465)"
+    >>> str(instance) == repr(instance)
+    True
+    >>> instance.slug
+    '60910000-1--33--2093465'
+    >>> instance.natural_key
+    "DteNaturalKey(" \
+    "emisor_rut=Rut('60910000-1'), tipo_dte=<TipoDteEnum.FACTURA_ELECTRONICA: 33>, folio=2093465)"
+
+    """
+
+    @property
+    def natural_key(self) -> DteNaturalKey:
+        return DteNaturalKey(emisor_rut=self.emisor_rut, tipo_dte=self.tipo_dte, folio=self.folio)
+
+
+@dataclasses.dataclass(frozen=True)
+class DteDataL1(DteDataL0):
+
+    """
+    DTE data level 1.
+
+    It is the minimal set of DTE data fields that are useful.
+    For example, SII has an endpoint that confirms that a given DTE exists,
+    and the data that it requires can be obtained from this struct.
+
+    The class instances are immutable.
+
+    >>> instance = DteDataL1(
+    ...     Rut('60910000-1'), TipoDteEnum.FACTURA_ELECTRONICA, 2093465, date(2018, 5, 7),
+    ...     Rut('60910000-1'), 10403)
+
+    >>> str(instance)
+    "DteDataL1(" \
+    "emisor_rut=Rut('60910000-1'), tipo_dte=<TipoDteEnum.FACTURA_ELECTRONICA: 33>, " \
+    "folio=2093465, fecha_emision_date=datetime.date(2018, 5, 7), " \
+    "receptor_rut=Rut('60910000-1'), monto_total=10403)"
+    >>> str(instance) == repr(instance)
+    True
+
+    """
+
+    fecha_emision_date: date = dc_field()
+    """
+    Field 'fecha_emision' of the DTE.
+
+    .. warning:: It may not match the **real date** on which the DTE was issued
+        or received/processed by SII.
+
+    """
+
+    receptor_rut: Rut = dc_field()
+    """
+    RUT of the "receptor" of the DTE.
+    """
+
+    monto_total: int = dc_field()
+    """
+    Total amount of the DTE.
+    """
+
+    def __post_init__(self) -> None:
+        """
+        Run validation automatically after setting the fields values.
+
+        :raises TypeError, ValueError:
+
+        """
+        super().__post_init__()
+
+        if not isinstance(self.fecha_emision_date, date):
+            raise TypeError("Inappropriate type of 'fecha_emision_date'.")
+
+        if not isinstance(self.receptor_rut, Rut):
+            raise TypeError("Inappropriate type of 'receptor_rut'.")
+
+        if not isinstance(self.monto_total, int):
+            raise TypeError("Inappropriate type of 'monto_total'.")
+
+        validate_dte_monto_total(self.monto_total)
+
+    @property
+    def natural_key(self) -> DteNaturalKey:
+        return DteNaturalKey(emisor_rut=self.emisor_rut, tipo_dte=self.tipo_dte, folio=self.folio)
+
+
+@dataclasses.dataclass(frozen=True)
+class DteDataL2(DteDataL1):
+
+    """
+    DTE data level 2.
+
+    About fields
+    - ``emisor_razon_social``: redundant but required by the DTE XML schema.
+    - ``receptor_razon_social``: redundant but required by the DTE XML schema.
+    - ``fecha_vencimiento`` (date): important for some business logic
+      but it is not required by the DTE XML schema.
+
+    The class instances are immutable.
+
+    """
+
+    emisor_razon_social: str = dc_field()
+    """
+    "Razón social" (legal name) of the "emisor" of the DTE.
+    """
+
+    receptor_razon_social: str = dc_field()
+    """
+    "Razón social" (legal name) of the "receptor" of the DTE.
+    """
+
+    fecha_vencimiento_date: Optional[date] = dc_field(default=None)
+    """
+    "Fecha de vencimiento (pago)" of the DTE.
+    """
+
+    def __post_init__(self) -> None:
+        """
+        Run validation automatically after setting the fields values.
+
+        :raises TypeError, ValueError:
+
+        """
+        super().__post_init__()
+
+        if not isinstance(self.emisor_razon_social, str):
+            raise TypeError("Inappropriate type of 'emisor_razon_social'.")
+        validate_contribuyente_razon_social(self.emisor_razon_social)
+
+        if not isinstance(self.receptor_razon_social, str):
+            raise TypeError("Inappropriate type of 'receptor_razon_social'.")
+        validate_contribuyente_razon_social(self.receptor_razon_social)
+
+        if self.fecha_vencimiento_date is not None:
+            if not isinstance(self.fecha_vencimiento_date, date):
+                raise TypeError("Inappropriate type of 'fecha_vencimiento_date'.")
diff --git a/tests/test_dte_data_models.py b/tests/test_dte_data_models.py
new file mode 100644
index 00000000..648c7251
--- /dev/null
+++ b/tests/test_dte_data_models.py
@@ -0,0 +1,48 @@
+import unittest
+
+from cl_sii.rut import Rut  # noqa: F401
+
+from cl_sii.dte.constants import TipoDteEnum  # noqa: F401
+from cl_sii.dte.data_models import (  # noqa: F401
+    DteDataL0, DteDataL1, DteDataL2, DteNaturalKey,
+    validate_contribuyente_razon_social, validate_dte_folio, validate_dte_monto_total,
+)
+
+
+class DteNaturalKeyTest(unittest.TestCase):
+
+    # TODO: implement!
+    pass
+
+
+class DteDataL0Test(unittest.TestCase):
+
+    # TODO: implement!
+    pass
+
+
+class DteDataL1Test(unittest.TestCase):
+
+    # TODO: implement!
+    pass
+
+
+class DteDataL2Test(unittest.TestCase):
+
+    # TODO: implement!
+    pass
+
+
+class FunctionsTest(unittest.TestCase):
+
+    def test_validate_contribuyente_razon_social(self) -> None:
+        # TODO: implement!
+        pass
+
+    def test_validate_dte_folio(self) -> None:
+        # TODO: implement!
+        pass
+
+    def test_validate_dte_monto_total(self) -> None:
+        # TODO: implement!
+        pass

From ceb260e37af89150ca87c70199d66e25d33cb895 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Thu, 17 Jan 2019 18:39:02 -0300
Subject: [PATCH 13/15] dte: add module `parse`

Code for parsing DTE data from representation as XML document
(support for other representations may be added later on).

Main function is `parse_dte_xml`, and the helpers are `clean_dte_xml`
and `validate_dte_xml`.

Tests have not been implemented.
---
 cl_sii/dte/parse.py     | 218 ++++++++++++++++++++++++++++++++++++++++
 tests/test_dte_parse.py |  34 +++++++
 2 files changed, 252 insertions(+)
 create mode 100644 cl_sii/dte/parse.py
 create mode 100644 tests/test_dte_parse.py

diff --git a/cl_sii/dte/parse.py b/cl_sii/dte/parse.py
new file mode 100644
index 00000000..8da7e5ff
--- /dev/null
+++ b/cl_sii/dte/parse.py
@@ -0,0 +1,218 @@
+"""
+Helpers for parsing DTE data from representations such as XML documents.
+
+
+Usage:
+
+>>> from cl_sii.dte import parse
+>>> from cl_sii.libs import xml_utils
+
+>>> with open('/dir/my_file.xml', mode='rb') as f:
+...     xml_doc = xml_utils.parse_untrusted_xml(f.read())
+
+>>> parse.clean_dte_xml(xml_doc)
+True
+>>> parse.validate_dte_xml(xml_doc)
+>>> dte_struct = parse.parse_dte_xml(xml_doc)
+
+"""
+import logging
+import os
+from dataclasses import MISSING, _MISSING_TYPE
+from datetime import date
+from typing import Optional, Union
+
+import lxml.etree
+
+from cl_sii.libs import xml_utils
+from cl_sii.rut import Rut
+from . import constants
+from . import data_models
+
+
+logger = logging.getLogger(__name__)
+
+
+DTE_XMLNS_MAP = {
+    'sii-dte': 'http://www.sii.cl/SiiDte',
+}
+"""
+Mapping from XML namespace prefix to full name, for DTE processing.
+"""
+
+
+_DTE_XML_SCHEMA_PATH = os.path.abspath(
+    os.path.join(
+        os.path.dirname(os.path.dirname(__file__)),
+        'data/ref/factura_electronica/schema_dte/EnvioDTE_v10.xsd',
+    )
+)
+DTE_XML_SCHEMA_OBJ = xml_utils.read_xml_schema(_DTE_XML_SCHEMA_PATH)
+"""
+XML schema obj for DTE XML document validation.
+
+It is read from a file at import time to avoid unnecessary reads afterwards.
+"""
+
+
+###############################################################################
+# main functions
+###############################################################################
+
+def clean_dte_xml(xml_doc: lxml.etree.ElementBase) -> bool:
+    """
+    Remove some non-compliant (DTE XML schema) data from ``xml_doc``.
+
+    Not all non-compliant data is removed; only some corresponding to popular
+    modifications but non-compliant nonetheless.
+
+    The object is modified in-place.
+
+    :returns: whether ``xml_doc`` was modified or not
+
+    """
+    modified = False
+
+    xml_etree = xml_doc.getroottree()
+
+    # Remove non-standard but popular element 'DocPersonalizado'.
+    xml_em = xml_etree.find('sii-dte:DocPersonalizado', namespaces=DTE_XMLNS_MAP)
+    if xml_em is not None:
+        modified = True
+        xml_doc.remove(xml_em)
+
+    return modified
+
+
+def validate_dte_xml(xml_doc: lxml.etree.ElementBase) -> None:
+    """
+    Validate ``xml_doc`` against DTE's XML schema.
+
+    :raises xml_utils.XmlSchemaDocValidationError:
+
+    """
+    # TODO: add better and more precise exception handling.
+    xml_utils.validate_xml_doc(DTE_XML_SCHEMA_OBJ, xml_doc)
+
+
+def parse_dte_xml(xml_doc: lxml.etree.ElementBase) -> data_models.DteDataL2:
+    """
+    Parse and deserialize DTE data from ``xml_doc``.
+
+    """
+    # TODO: separate the XML parsing stage from the deserialization stage, which could be
+    #   performed by XML-agnostic code (perhaps using Marshmallow or data clacases?).
+    #   See :class:`cl_sii.rcv.parse.RcvCsvRowSchema`.
+
+    xml_element_root_tree = xml_doc.getroottree()
+
+    obj_struct = data_models.DteDataL2(
+        emisor_rut=_get_emisor_rut(xml_element_root_tree),
+        tipo_dte=_get_tipo_dte(xml_element_root_tree),
+        folio=_get_folio(xml_element_root_tree),
+        fecha_emision_date=_get_fecha_emision(xml_element_root_tree),
+        receptor_rut=_get_receptor_rut(xml_element_root_tree),
+        monto_total=_get_monto_total(xml_element_root_tree),
+        emisor_razon_social=_get_emisor_razon_social(xml_element_root_tree),
+        receptor_razon_social=_get_receptor_razon_social(xml_element_root_tree),
+        fecha_vencimiento_date=_get_fecha_vencimiento(xml_element_root_tree, default=None),
+    )
+
+    return obj_struct
+
+
+###############################################################################
+# helpers
+###############################################################################
+
+def _get_tipo_dte(xml_etree: lxml.etree.ElementTree) -> constants.TipoDteEnum:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:IdDoc/sii-dte:TipoDTE'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'TipoDTE' was not found in the XML document.")
+    return constants.TipoDteEnum(int(value_str))
+
+
+def _get_folio(xml_etree: lxml.etree.ElementTree) -> int:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:IdDoc/sii-dte:Folio'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'Folio' was not found in the XML document.")
+    return int(value_str)
+
+
+def _get_fecha_emision(xml_etree: lxml.etree.ElementTree) -> date:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:IdDoc/sii-dte:FchEmis'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'FchEmis' was not found in the XML document.")
+    return date.fromisoformat(value_str)
+
+
+def _get_fecha_vencimiento(
+    xml_etree: lxml.etree.ElementTree,
+    default: Union[date, None, _MISSING_TYPE] = MISSING,
+) -> Optional[date]:
+
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:IdDoc/sii-dte:FchVenc'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        if default is None or isinstance(default, date):
+            value = default
+        elif default is MISSING:
+            raise Exception("Element 'FchVenc' was not found in the XML document.")
+        else:
+            raise TypeError("Invalid type of 'default'.")
+    else:
+        value = date.fromisoformat(value_str)
+
+    return value
+
+
+def _get_emisor_rut(xml_etree: lxml.etree.ElementTree) -> Rut:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:Emisor/sii-dte:RUTEmisor'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'RUTEmisor' was not found in the XML document.")
+    return Rut(value_str)
+
+
+def _get_emisor_razon_social(xml_etree: lxml.etree.ElementTree) -> str:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:Emisor/sii-dte:RznSoc'
+
+    value_str: str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'RznSoc' was not found in the XML document.")
+    return value_str
+
+
+def _get_receptor_rut(xml_etree: lxml.etree.ElementTree) -> Rut:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:Receptor/sii-dte:RUTRecep'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'RUTRecep' was not found in the XML document.")
+    return Rut(value_str)
+
+
+def _get_receptor_razon_social(xml_etree: lxml.etree.ElementTree) -> str:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:Receptor/sii-dte:RznSocRecep'
+
+    value_str: str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'RznSocRecep' was not found in the XML document.")
+    return value_str
+
+
+def _get_monto_total(xml_etree: lxml.etree.ElementTree) -> int:
+    em_path = 'sii-dte:Documento/sii-dte:Encabezado/sii-dte:Totales/sii-dte:MntTotal'
+
+    value_str = xml_etree.findtext(em_path, namespaces=DTE_XMLNS_MAP)
+    if value_str is None:
+        raise Exception("Element 'MntTotal' was not found in the XML document.")
+    return int(value_str)
diff --git a/tests/test_dte_parse.py b/tests/test_dte_parse.py
new file mode 100644
index 00000000..1e416018
--- /dev/null
+++ b/tests/test_dte_parse.py
@@ -0,0 +1,34 @@
+import unittest
+
+from cl_sii.dte.parse import (  # noqa: F401
+    clean_dte_xml, parse_dte_xml, validate_dte_xml,
+    DTE_XML_SCHEMA_OBJ, DTE_XMLNS_MAP,
+)
+
+
+# TODO: add a real DTE XML file in 'tests/test_data/dte/'.
+
+
+class OthersTest(unittest.TestCase):
+
+    def test_DTE_XML_SCHEMA_OBJ(self) -> None:
+        # TODO: implement
+        pass
+
+
+class FunctionCleanDteXmlTest(unittest.TestCase):
+
+    # TODO: implement
+    pass
+
+
+class FunctionParseDteXmlTest(unittest.TestCase):
+
+    # TODO: implement
+    pass
+
+
+class FunctionValidateDteXmlTest(unittest.TestCase):
+
+    # TODO: implement
+    pass

From de73bd8ec86a935b61bd00af7662c270a83261ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 5 Apr 2019 12:05:52 -0300
Subject: [PATCH 14/15] HISTORY: update for new version

---
 HISTORY.rst | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 61295351..156f2111 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -3,15 +3,20 @@
 History
 -------
 
-unreleased (YYYY-MM-DD)
+0.3.0 (2019-04-05)
 +++++++++++++++++++++++
 
+* (PR #11, 2019-04-05) dte: add module ``parse``
+* (PR #10, 2019-04-05) dte: add module ``data_models``
+* (PR #9, 2019-04-05) libs: add module ``xml_utils``
+* (PR #8, 2019-04-05) add sub-package ``rcv``
+
 0.2.0 (2019-04-04)
 +++++++++++++++++++++++
 
-- (PR #6, 2019-04-04) data.ref: add XML schemas of "factura electrónica"
-- (PR #5, 2019-04-04) extras: add 'RutField' for Django models, DRF and MM
-- (PR #4, 2019-04-04) Config CircleCI
+* (PR #6, 2019-04-04) data.ref: add XML schemas of "factura electrónica"
+* (PR #5, 2019-04-04) extras: add 'RutField' for Django models, DRF and MM
+* (PR #4, 2019-04-04) Config CircleCI
 
 0.1.0 (2019-04-04)
 +++++++++++++++++++++++

From 44329ddeb1ac4cdfb2580edb00e405f10e4e5537 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Germ=C3=A1n=20Larra=C3=ADn?=
 <glarrain@users.noreply.github.com>
Date: Fri, 5 Apr 2019 12:06:50 -0300
Subject: [PATCH 15/15] =?UTF-8?q?Bump=20version:=200.2.0=20=E2=86=92=200.3?=
 =?UTF-8?q?.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg   | 2 +-
 cl_sii/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 8bb38d7a..3cc5aae5 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.0
+current_version = 0.3.0
 commit = True
 tag = True
 
diff --git a/cl_sii/__init__.py b/cl_sii/__init__.py
index e015403b..dd17c5a4 100644
--- a/cl_sii/__init__.py
+++ b/cl_sii/__init__.py
@@ -5,4 +5,4 @@
 """
 
 
-__version__ = '0.2.0'
+__version__ = '0.3.0'