Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.2
current_version = 0.6.3
commit = True
tag = True

Expand Down
9 changes: 9 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
History
-------

0.6.3 (2019-05-24)
+++++++++++++++++++++++

* (PR #52, 2019-05-24) rcv: add module ``parse_csv``
* (PR #51, 2019-05-24) libs: add module ``rows_processing``
* (PR #50, 2019-05-24) libs: add module ``csv_utils``
* (PR #49, 2019-05-24) libs.mm_utils: add ``validate_no_unexpected_input_fields``
* (PR #48, 2019-05-24) dte.data_models: add ``DteDataL2.as_dte_data_l1``

0.6.2 (2019-05-15)
+++++++++++++++++++++++

Expand Down
2 changes: 1 addition & 1 deletion cl_sii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
"""


__version__ = '0.6.2'
__version__ = '0.6.3'
9 changes: 9 additions & 0 deletions cl_sii/dte/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,12 @@ def __post_init__(self) -> None:
raise TypeError("Inappropriate type of 'receptor_email'.")
validate_clean_str(self.receptor_email)
validate_non_empty_str(self.receptor_email)

def as_dte_data_l1(self) -> DteDataL1:
return DteDataL1(
emisor_rut=self.emisor_rut,
tipo_dte=self.tipo_dte,
folio=self.folio,
fecha_emision_date=self.fecha_emision_date,
receptor_rut=self.receptor_rut,
monto_total=self.monto_total)
49 changes: 49 additions & 0 deletions cl_sii/libs/csv_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import csv
from typing import IO, Sequence, Type, Union


def create_csv_dict_reader(
text_stream: IO[str],
csv_dialect: Type[csv.Dialect],
row_dict_extra_fields_key: Union[str, None] = None,
expected_fields_strict: bool = True,
expected_field_names: Sequence[str] = None,
) -> csv.DictReader:
"""
Create a CSV dict reader with custom options.

:param text_stream:
:param row_dict_extra_fields_key:
CSV row dict key under which the extra data in the row will be saved
:param csv_dialect:
:param expected_fields_strict:
:param expected_field_names:
(required if ``expected_field_names`` is True)
:return: a CSV DictReader

"""
# note: mypy wrongly complains: it does not accept 'fieldnames' to be None but that value
# is completely acceptable, and it even is the default!
# > error: Argument "fieldnames" to "DictReader" has incompatible type "None"; expected
# > "Sequence[str]"
# note: mypy wrongly complains:
# > Argument "dialect" to "DictReader" has incompatible type "Type[Dialect]";
# > expected "Union[str, Dialect]"
csv_reader = csv.DictReader( # type: ignore
text_stream,
fieldnames=None, # the values of the first row will be used as the fieldnames
restkey=row_dict_extra_fields_key,
dialect=csv_dialect,
)

if expected_fields_strict:
if expected_field_names:
if tuple(csv_reader.fieldnames) != expected_field_names:
raise ValueError(
"CSV file field names do not match those expected, or their order.",
csv_reader.fieldnames)
else:
raise ValueError(
"Param 'expected_field_names' is required if 'expected_fields_strict' is True.")

return csv_reader
42 changes: 42 additions & 0 deletions cl_sii/libs/mm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,48 @@
import marshmallow.utils


###############################################################################
# validators
###############################################################################

def validate_no_unexpected_input_fields(
schema: marshmallow.Schema,
data: dict,
original_data: dict,
) -> None:
"""
Fail validation if there was an unexpected input field.

Usage::

class MySchema(marshmallow.Schema):

class Meta:
strict = True

folio = marshmallow.fields.Integer()

@marshmallow.validates_schema(pass_original=True)
def validate_schema(self, data: dict, original_data: dict) -> None:
validate_no_unexpected_input_fields(self, data, original_data)

"""
# Original inspiration from
# https://marshmallow.readthedocs.io/en/2.x-line/extending.html#validating-original-input-data
fields_name_or_load_from = {
field.name if field.load_from is None else field.load_from
for field_key, field in schema.fields.items()
}
unexpected_input_fields = set(original_data) - fields_name_or_load_from
if unexpected_input_fields:
raise marshmallow.ValidationError(
"Unexpected input field.", field_names=list(unexpected_input_fields))


###############################################################################
# fields
###############################################################################

class CustomMarshmallowDateField(marshmallow.fields.Field):
"""
A formatted date string.
Expand Down
153 changes: 153 additions & 0 deletions cl_sii/libs/rows_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import csv
import logging

from typing import Dict, Iterable, Sequence, Tuple

import marshmallow


logger = logging.getLogger(__name__)


class MaxRowsExceeded(RuntimeError):

"""
The maximum number of rows has been exceeded.
"""


###############################################################################
# iterators
###############################################################################

def csv_rows_mm_deserialization_iterator(
csv_reader: csv.DictReader,
row_schema: marshmallow.Schema,
n_rows_offset: int = 0,
max_n_rows: int = None,
fields_to_remove_names: Sequence[str] = None,
) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]:
"""
Marshmallow deserialization iterator over CSV rows.

Iterate over ``csv_reader``, deserialize each row using ``row_schema``
and yield the data before and after deserialization, plus any
validation/deserialization errors.

.. note:: The CSV header row is omitted, obviously.

:param csv_reader:
:param row_schema:
Marshmallow schema for deserializing each CSV row
:param n_rows_offset:
(optional) number of rows to skip (and not deserialize)
:param max_n_rows:
(optional) max number of rows to deserialize (raise exception
if exceeded); ``None`` means no limit
:param fields_to_remove_names:
(optional) the name of each field that must be removed (if it exists)
from the row
:returns:
yields a tuple of (``row_ix`` (1-based), ``row_data``,
``deserialized_row_data``, ``validation_errors``)
:raises MaxRowsExceeded:
number of data rows processed exceeded ``max_n_rows``
:raises RuntimeError:
on CSV error when iterating over ``csv_reader``

"""
# note: mypy complaint is wrong because a 'csv.DictReader' object can be iterated over
# and yields instances of 'Dict[str, object]'.
# > Incompatible types in assignment (expression has type "DictReader", variable has type
# > "Iterable[Dict[str, object]]")
rows_iterator: Iterable[Dict[str, object]] = csv_reader # type: ignore
iterator = rows_mm_deserialization_iterator(
rows_iterator, row_schema, n_rows_offset, max_n_rows, fields_to_remove_names)

try:
# note: we chose not to use 'yield from' to be explicit about what we are yielding.
for row_ix, row_data, deserialized_row_data, validation_errors in iterator:
yield row_ix, row_data, deserialized_row_data, validation_errors
except csv.Error as exc:
exc_msg = f"CSV error for line {csv_reader.line_num} of CSV file."
raise RuntimeError(exc_msg) from exc


def rows_mm_deserialization_iterator(
rows_iterator: Iterable[Dict[str, object]],
row_schema: marshmallow.Schema,
n_rows_offset: int = 0,
max_n_rows: int = None,
fields_to_remove_names: Sequence[str] = None,
) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]:
"""
Marshmallow deserialization iterator.

Iterate over ``rows_iterator``, deserialize each row using ``row_schema``
and yield the data before and after deserialization, plus any
validation/deserialization errors.

:param rows_iterator:
:param row_schema:
Marshmallow schema for deserializing each row
:param n_rows_offset:
(optional) number of rows to skip (and not deserialize)
:param max_n_rows:
(optional) max number of rows to deserialize (raise exception
if exceeded); ``None`` means no limit
:param fields_to_remove_names:
(optional) the name of each field that must be removed (if it exists)
from the row
:returns:
yields a tuple of (``row_ix`` (1-based), ``row_data``,
``deserialized_row_data``, ``validation_errors``)
:raises MaxRowsExceeded:
number of data rows processed exceeded ``max_n_rows``

"""
if not n_rows_offset >= 0:
raise ValueError("Param 'n_rows_offset' must be an integer >= 0.")

fields_to_remove_names = fields_to_remove_names or ()

for row_ix, row_data in enumerate(rows_iterator, start=1):
if max_n_rows is not None and row_ix > max_n_rows + n_rows_offset:
raise MaxRowsExceeded(f"Exceeded 'max_n_rows' limit: {max_n_rows}.")

if row_ix <= n_rows_offset:
continue

for _field_name in fields_to_remove_names:
row_data.pop(_field_name, None)

try:
mm_result: marshmallow.UnmarshalResult = row_schema.load(row_data)
deserialized_row_data: dict = mm_result.data
raised_validation_errors: dict = {}
returned_validation_errors: dict = mm_result.errors
except marshmallow.ValidationError as exc:
deserialized_row_data = {}
raised_validation_errors = dict(exc.normalized_messages())
returned_validation_errors = {}

validation_errors = raised_validation_errors
if returned_validation_errors:
if row_schema.strict:
# 'marshmallow.schema.BaseSchema':
# > :param bool strict: If `True`, raise errors if invalid data are passed in
# > instead of failing silently and storing the errors.
logger.error(
"Marshmallow schema is 'strict' but validation errors were returned by "
"method 'load' ('UnmarshalResult.errors') instead of being raised. "
"Errors: %s",
repr(returned_validation_errors))
if raised_validation_errors:
logger.fatal(
"Programming error: either returned or raised validation errors "
"(depending on 'strict') but never both. "
"Returned errors: %s. Raised errors: %s",
repr(returned_validation_errors), repr(raised_validation_errors))

validation_errors.update(returned_validation_errors)

yield row_ix, row_data, deserialized_row_data, validation_errors
Loading