Skip to content

Commit

Permalink
Merge pull request #53 from fyndata/develop
Browse files Browse the repository at this point in the history
Release v0.6.3
  • Loading branch information
glarrain committed May 25, 2019
2 parents dba26db + 4bea003 commit 7aad935
Show file tree
Hide file tree
Showing 14 changed files with 795 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.2
current_version = 0.6.3
commit = True
tag = True

Expand Down
9 changes: 9 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
History
-------

0.6.3 (2019-05-24)
+++++++++++++++++++++++

* (PR #52, 2019-05-24) rcv: add module ``parse_csv``
* (PR #51, 2019-05-24) libs: add module ``rows_processing``
* (PR #50, 2019-05-24) libs: add module ``csv_utils``
* (PR #49, 2019-05-24) libs.mm_utils: add ``validate_no_unexpected_input_fields``
* (PR #48, 2019-05-24) dte.data_models: add ``DteDataL2.as_dte_data_l1``

0.6.2 (2019-05-15)
+++++++++++++++++++++++

Expand Down
2 changes: 1 addition & 1 deletion cl_sii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
"""


__version__ = '0.6.2'
__version__ = '0.6.3'
9 changes: 9 additions & 0 deletions cl_sii/dte/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,12 @@ def __post_init__(self) -> None:
raise TypeError("Inappropriate type of 'receptor_email'.")
validate_clean_str(self.receptor_email)
validate_non_empty_str(self.receptor_email)

def as_dte_data_l1(self) -> DteDataL1:
return DteDataL1(
emisor_rut=self.emisor_rut,
tipo_dte=self.tipo_dte,
folio=self.folio,
fecha_emision_date=self.fecha_emision_date,
receptor_rut=self.receptor_rut,
monto_total=self.monto_total)
49 changes: 49 additions & 0 deletions cl_sii/libs/csv_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import csv
from typing import IO, Sequence, Type, Union


def create_csv_dict_reader(
text_stream: IO[str],
csv_dialect: Type[csv.Dialect],
row_dict_extra_fields_key: Union[str, None] = None,
expected_fields_strict: bool = True,
expected_field_names: Sequence[str] = None,
) -> csv.DictReader:
"""
Create a CSV dict reader with custom options.
:param text_stream:
:param row_dict_extra_fields_key:
CSV row dict key under which the extra data in the row will be saved
:param csv_dialect:
:param expected_fields_strict:
:param expected_field_names:
(required if ``expected_field_names`` is True)
:return: a CSV DictReader
"""
# note: mypy wrongly complains: it does not accept 'fieldnames' to be None but that value
# is completely acceptable, and it even is the default!
# > error: Argument "fieldnames" to "DictReader" has incompatible type "None"; expected
# > "Sequence[str]"
# note: mypy wrongly complains:
# > Argument "dialect" to "DictReader" has incompatible type "Type[Dialect]";
# > expected "Union[str, Dialect]"
csv_reader = csv.DictReader( # type: ignore
text_stream,
fieldnames=None, # the values of the first row will be used as the fieldnames
restkey=row_dict_extra_fields_key,
dialect=csv_dialect,
)

if expected_fields_strict:
if expected_field_names:
if tuple(csv_reader.fieldnames) != expected_field_names:
raise ValueError(
"CSV file field names do not match those expected, or their order.",
csv_reader.fieldnames)
else:
raise ValueError(
"Param 'expected_field_names' is required if 'expected_fields_strict' is True.")

return csv_reader
42 changes: 42 additions & 0 deletions cl_sii/libs/mm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,48 @@
import marshmallow.utils


###############################################################################
# validators
###############################################################################

def validate_no_unexpected_input_fields(
schema: marshmallow.Schema,
data: dict,
original_data: dict,
) -> None:
"""
Fail validation if there was an unexpected input field.
Usage::
class MySchema(marshmallow.Schema):
class Meta:
strict = True
folio = marshmallow.fields.Integer()
@marshmallow.validates_schema(pass_original=True)
def validate_schema(self, data: dict, original_data: dict) -> None:
validate_no_unexpected_input_fields(self, data, original_data)
"""
# Original inspiration from
# https://marshmallow.readthedocs.io/en/2.x-line/extending.html#validating-original-input-data
fields_name_or_load_from = {
field.name if field.load_from is None else field.load_from
for field_key, field in schema.fields.items()
}
unexpected_input_fields = set(original_data) - fields_name_or_load_from
if unexpected_input_fields:
raise marshmallow.ValidationError(
"Unexpected input field.", field_names=list(unexpected_input_fields))


###############################################################################
# fields
###############################################################################

class CustomMarshmallowDateField(marshmallow.fields.Field):
"""
A formatted date string.
Expand Down
153 changes: 153 additions & 0 deletions cl_sii/libs/rows_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import csv
import logging

from typing import Dict, Iterable, Sequence, Tuple

import marshmallow


logger = logging.getLogger(__name__)


class MaxRowsExceeded(RuntimeError):

"""
The maximum number of rows has been exceeded.
"""


###############################################################################
# iterators
###############################################################################

def csv_rows_mm_deserialization_iterator(
csv_reader: csv.DictReader,
row_schema: marshmallow.Schema,
n_rows_offset: int = 0,
max_n_rows: int = None,
fields_to_remove_names: Sequence[str] = None,
) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]:
"""
Marshmallow deserialization iterator over CSV rows.
Iterate over ``csv_reader``, deserialize each row using ``row_schema``
and yield the data before and after deserialization, plus any
validation/deserialization errors.
.. note:: The CSV header row is omitted, obviously.
:param csv_reader:
:param row_schema:
Marshmallow schema for deserializing each CSV row
:param n_rows_offset:
(optional) number of rows to skip (and not deserialize)
:param max_n_rows:
(optional) max number of rows to deserialize (raise exception
if exceeded); ``None`` means no limit
:param fields_to_remove_names:
(optional) the name of each field that must be removed (if it exists)
from the row
:returns:
yields a tuple of (``row_ix`` (1-based), ``row_data``,
``deserialized_row_data``, ``validation_errors``)
:raises MaxRowsExceeded:
number of data rows processed exceeded ``max_n_rows``
:raises RuntimeError:
on CSV error when iterating over ``csv_reader``
"""
# note: mypy complaint is wrong because a 'csv.DictReader' object can be iterated over
# and yields instances of 'Dict[str, object]'.
# > Incompatible types in assignment (expression has type "DictReader", variable has type
# > "Iterable[Dict[str, object]]")
rows_iterator: Iterable[Dict[str, object]] = csv_reader # type: ignore
iterator = rows_mm_deserialization_iterator(
rows_iterator, row_schema, n_rows_offset, max_n_rows, fields_to_remove_names)

try:
# note: we chose not to use 'yield from' to be explicit about what we are yielding.
for row_ix, row_data, deserialized_row_data, validation_errors in iterator:
yield row_ix, row_data, deserialized_row_data, validation_errors
except csv.Error as exc:
exc_msg = f"CSV error for line {csv_reader.line_num} of CSV file."
raise RuntimeError(exc_msg) from exc


def rows_mm_deserialization_iterator(
rows_iterator: Iterable[Dict[str, object]],
row_schema: marshmallow.Schema,
n_rows_offset: int = 0,
max_n_rows: int = None,
fields_to_remove_names: Sequence[str] = None,
) -> Iterable[Tuple[int, Dict[str, object], Dict[str, object], dict]]:
"""
Marshmallow deserialization iterator.
Iterate over ``rows_iterator``, deserialize each row using ``row_schema``
and yield the data before and after deserialization, plus any
validation/deserialization errors.
:param rows_iterator:
:param row_schema:
Marshmallow schema for deserializing each row
:param n_rows_offset:
(optional) number of rows to skip (and not deserialize)
:param max_n_rows:
(optional) max number of rows to deserialize (raise exception
if exceeded); ``None`` means no limit
:param fields_to_remove_names:
(optional) the name of each field that must be removed (if it exists)
from the row
:returns:
yields a tuple of (``row_ix`` (1-based), ``row_data``,
``deserialized_row_data``, ``validation_errors``)
:raises MaxRowsExceeded:
number of data rows processed exceeded ``max_n_rows``
"""
if not n_rows_offset >= 0:
raise ValueError("Param 'n_rows_offset' must be an integer >= 0.")

fields_to_remove_names = fields_to_remove_names or ()

for row_ix, row_data in enumerate(rows_iterator, start=1):
if max_n_rows is not None and row_ix > max_n_rows + n_rows_offset:
raise MaxRowsExceeded(f"Exceeded 'max_n_rows' limit: {max_n_rows}.")

if row_ix <= n_rows_offset:
continue

for _field_name in fields_to_remove_names:
row_data.pop(_field_name, None)

try:
mm_result: marshmallow.UnmarshalResult = row_schema.load(row_data)
deserialized_row_data: dict = mm_result.data
raised_validation_errors: dict = {}
returned_validation_errors: dict = mm_result.errors
except marshmallow.ValidationError as exc:
deserialized_row_data = {}
raised_validation_errors = dict(exc.normalized_messages())
returned_validation_errors = {}

validation_errors = raised_validation_errors
if returned_validation_errors:
if row_schema.strict:
# 'marshmallow.schema.BaseSchema':
# > :param bool strict: If `True`, raise errors if invalid data are passed in
# > instead of failing silently and storing the errors.
logger.error(
"Marshmallow schema is 'strict' but validation errors were returned by "
"method 'load' ('UnmarshalResult.errors') instead of being raised. "
"Errors: %s",
repr(returned_validation_errors))
if raised_validation_errors:
logger.fatal(
"Programming error: either returned or raised validation errors "
"(depending on 'strict') but never both. "
"Returned errors: %s. Raised errors: %s",
repr(returned_validation_errors), repr(raised_validation_errors))

validation_errors.update(returned_validation_errors)

yield row_ix, row_data, deserialized_row_data, validation_errors

0 comments on commit 7aad935

Please sign in to comment.