diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b0541859..1e57eee7 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.1 +current_version = 0.6.2 commit = True tag = True diff --git a/HISTORY.rst b/HISTORY.rst index c296e605..ec01202d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,12 @@ History ------- +0.6.2 (2019-05-15) ++++++++++++++++++++++++ + +* (PR #45, 2019-05-15) libs.encoding_utils: improve ``clean_base64`` +* (PR #44, 2019-05-15) dte.parse: fix edge case in ``parse_dte_xml`` + 0.6.1 (2019-05-08) +++++++++++++++++++++++ diff --git a/cl_sii/__init__.py b/cl_sii/__init__.py index 6301d193..d90243c9 100644 --- a/cl_sii/__init__.py +++ b/cl_sii/__init__.py @@ -5,4 +5,4 @@ """ -__version__ = '0.6.1' +__version__ = '0.6.2' diff --git a/cl_sii/dte/parse.py b/cl_sii/dte/parse.py index d7a5f0ed..00b6077d 100644 --- a/cl_sii/dte/parse.py +++ b/cl_sii/dte/parse.py @@ -21,7 +21,7 @@ import logging import os from datetime import date, datetime -from typing import Tuple +from typing import Optional, Tuple from cl_sii.libs import encoding_utils from cl_sii.libs import tz_utils @@ -430,32 +430,37 @@ def parse_dte_xml(xml_doc: XmlElement) -> data_models.DteDataL2: # values parsing ########################################################################### - tipo_dte_value = constants.TipoDteEnum(int(tipo_dte_em.text.strip())) - folio_value = int(folio_em.text.strip()) - fecha_emision_value = date.fromisoformat(fecha_emision_em.text.strip()) + tipo_dte_value = constants.TipoDteEnum(int(_text_strip_or_raise(tipo_dte_em))) + folio_value = int(_text_strip_or_raise(folio_em)) + fecha_emision_value = date.fromisoformat(_text_strip_or_raise(fecha_emision_em)) fecha_vencimiento_value = None if fecha_vencimiento_em is not None: - fecha_vencimiento_value = date.fromisoformat(fecha_vencimiento_em.text.strip()) + fecha_vencimiento_value = date.fromisoformat( + _text_strip_or_raise(fecha_vencimiento_em)) - emisor_rut_value = Rut(emisor_rut_em.text.strip()) - emisor_razon_social_value = emisor_razon_social_em.text.strip() - emisor_giro_value = emisor_giro_em.text.strip() - emisor_email_value = emisor_email_em.text.strip() if emisor_email_em is not None else None + emisor_rut_value = Rut(_text_strip_or_raise(emisor_rut_em)) + emisor_razon_social_value = _text_strip_or_raise(emisor_razon_social_em) + emisor_giro_value = _text_strip_or_raise(emisor_giro_em) + emisor_email_value = None + if emisor_email_em is not None: + emisor_email_value = _text_strip_or_none(emisor_email_em) - receptor_rut_value = Rut(receptor_rut_em.text.strip()) - receptor_razon_social_value = receptor_razon_social_em.text.strip() - receptor_email_value = receptor_email_em.text.strip() if receptor_email_em is not None else None + receptor_rut_value = Rut(_text_strip_or_raise(receptor_rut_em)) + receptor_razon_social_value = _text_strip_or_raise(receptor_razon_social_em) + receptor_email_value = None + if receptor_email_em is not None: + receptor_email_value = _text_strip_or_none(receptor_email_em) - monto_total_value = int(monto_total_em.text.strip()) + monto_total_value = int(_text_strip_or_raise(monto_total_em)) tmst_firma_value = tz_utils.convert_naive_dt_to_tz_aware( - dt=datetime.fromisoformat(tmst_firma_em.text), + dt=datetime.fromisoformat(_text_strip_or_raise(tmst_firma_em)), tz=data_models.DteDataL2.DATETIME_FIELDS_TZ) signature_signature_value = encoding_utils.decode_base64_strict( - signature_signature_value_em.text.strip()) + _text_strip_or_raise(signature_signature_value_em)) signature_key_info_x509_cert_der = encoding_utils.decode_base64_strict( - signature_key_info_x509_cert_em.text.strip()) + _text_strip_or_raise(signature_key_info_x509_cert_em)) return data_models.DteDataL2( emisor_rut=emisor_rut_value, @@ -476,6 +481,39 @@ def parse_dte_xml(xml_doc: XmlElement) -> data_models.DteDataL2: ) +def _text_strip_or_none(xml_em: XmlElement) -> Optional[str]: + # note: we need the pair of functions '_text_strip_or_none' and '_text_strip_or_raise' + # because, under certain circumstances, an XML tag: + # - with no content -> `xml_em.text` is None instead of '' + # - with leading and/or trailing whitespace -> `xml_em.text` may or may not include that + + if xml_em is None: + raise ValueError("Value must be an XML element, not None.") + + stripped_text: Optional[str] = None + if xml_em.text is not None: + stripped_text = xml_em.text.strip() + + return stripped_text + + +def _text_strip_or_raise(xml_em: XmlElement) -> str: + # note: we need the pair of functions '_text_strip_or_none' and '_text_strip_or_raise' + # because, under certain circumstances, an XML tag: + # - with no content -> `xml_em.text` is None instead of '' + # - with leading and/or trailing whitespace -> `xml_em.text` may or may not include that + + if xml_em is None: + raise ValueError("Value must be an XML element, not None.") + + if xml_em.text is None: + raise ValueError("Text of XML element is None.") + else: + stripped_text: str = xml_em.text.strip() + + return stripped_text + + ############################################################################### # helpers ############################################################################### diff --git a/cl_sii/libs/encoding_utils.py b/cl_sii/libs/encoding_utils.py index 9a6adb14..fbfa1897 100644 --- a/cl_sii/libs/encoding_utils.py +++ b/cl_sii/libs/encoding_utils.py @@ -24,7 +24,13 @@ def clean_base64(value: Union[str, bytes]) -> bytes: raise TypeError("Value must be str or bytes.") # remove line breaks and spaces - value_base64_bytes_cleaned = value_base64_bytes.replace(b'\n', b'').replace(b' ', b'') + # warning: we may only remove characters that are not part of the standard base-64 alphabet + # (or any of its popular alternatives). + value_base64_bytes_cleaned = value_base64_bytes \ + .replace(b'\n', b'') \ + .replace(b'\r', b'') \ + .replace(b'\t', b'') \ + .replace(b' ', b'') return value_base64_bytes_cleaned diff --git a/tests/test_data/sii-dte/DTE--76354771-K--33--170--cleaned-mod-empty-emails.xml b/tests/test_data/sii-dte/DTE--76354771-K--33--170--cleaned-mod-empty-emails.xml new file mode 100644 index 00000000..16e8e9da --- /dev/null +++ b/tests/test_data/sii-dte/DTE--76354771-K--33--170--cleaned-mod-empty-emails.xml @@ -0,0 +1,123 @@ + + + + + + + 33 + 170 + 2019-04-01 + 1 + 1 + 2 + + + 76354771-K + INGENIERIA ENACON SPA + Ingenieria y Construccion + + 421000 + 078525666 + MERCED 753 16 ARBOLEDA DE QUIILOTA + QUILLOTA + QUILLOTA + + + 96790240-3 + MINERA LOS PELAMBRES + EXTRACCION Y PROCESAMIENTO DE COBRE + + Felipe Barria + Av. Apoquindo 4001 1802 + LAS CONDES + SANTIAGO + + + 2517900 + 19.00 + 478401 + 2996301 + + + + 1 + Tableros electricos 3 tom + as 3p + t; 380v; 50 hz; 32a; 3 tomas monofasicas 2p + t; 240v; 50 hz; 16a; proteccion ip, segun orden de compra de la referencia.- + 2.00 + Unid + 1258950.00 + 2517900 + + + 1 + 801 + 4510083633 + 2019-03-22 + +
76354771-K331702019-04-0196790240-3MINERA LOS PELAMBRES2996301Tableros electricos 3 tom76354771-KINGENIERIA ENACON SPA331701702019-04-01uv7BUO3yg/7RoMjh1mPXXG/8YIwjtXsu7kcOq7dZQj66QCiY4FVz2fIhF1jaU0GSikq/jq26IFGylGus92OnPQ==Aw==300PI7bw8y0RNUJrGxyhb2gr6BjFtv/Ikyo/6g69wycoXTHSoRML3xvZvOBytreN7REw9JF0Ldoj91RRtaZbH38bA==2019-04-01T01:36:40
DKFS7bNYRpVYLNEII+eyLcBHmNwQIHVkbqgR96wKcnDEcU6NsHQUMUyXpr7ql7xD9iuGkZDmNxHuY+Mq913oSA==
+ 2019-04-01T01:36:40 + +
+ + + + + + + + + +ij2Qn6xOc2eRx3hwyO/GrzptoBk= + + + +fsYP5p/lNfofAz8POShrJjqXdBTNNtvv4/TWCxbvwTIAXr7BLrlvX3C/Hpfo4viqaxSu1OGFgPnk +ddDIFwj/ZsVdbdB+MhpKkyha83RxhJpYBVBY3c+y9J6oMfdIdMAYXhEkFw8w63KHyhdf2E9dnbKi +wqSxDcYjTT6vXsLPrZk= + + + + + +pB4Bs0Op+L0za/zpFQYBiCrVlIOKgULo4uvRLCI5picuxI6X4rE7f3g9XBIZrqtmTUSshmifKLXl +9T/ScdkuLyIcsHj0QHkbe0LCHSzw1+pH1yTT/dn5NeFVR2InIkL/PzHkjmVJR/M0R50lGJ1W+nqN +Uavs/9J+gR9BBMs/eYE= + +AQAB + + + + +MIIGVDCCBTygAwIBAgIKMUWmvgAAAAjUHTANBgkqhkiG9w0BAQUFADCB0jELMAkGA1UEBhMCQ0wx +HTAbBgNVBAgTFFJlZ2lvbiBNZXRyb3BvbGl0YW5hMREwDwYDVQQHEwhTYW50aWFnbzEUMBIGA1UE +ChMLRS1DRVJUQ0hJTEUxIDAeBgNVBAsTF0F1dG9yaWRhZCBDZXJ0aWZpY2Fkb3JhMTAwLgYDVQQD +EydFLUNFUlRDSElMRSBDQSBGSVJNQSBFTEVDVFJPTklDQSBTSU1QTEUxJzAlBgkqhkiG9w0BCQEW +GHNjbGllbnRlc0BlLWNlcnRjaGlsZS5jbDAeFw0xNzA5MDQyMTExMTJaFw0yMDA5MDMyMTExMTJa +MIHXMQswCQYDVQQGEwJDTDEUMBIGA1UECBMLVkFMUEFSQUlTTyAxETAPBgNVBAcTCFF1aWxsb3Rh +MS8wLQYDVQQKEyZTZXJ2aWNpb3MgQm9uaWxsYSB5IExvcGV6IHkgQ2lhLiBMdGRhLjEkMCIGA1UE +CwwbSW5nZW5pZXLDrWEgeSBDb25zdHJ1Y2Npw7NuMSMwIQYDVQQDExpSYW1vbiBodW1iZXJ0byBM +b3BleiAgSmFyYTEjMCEGCSqGSIb3DQEJARYUZW5hY29ubHRkYUBnbWFpbC5jb20wgZ8wDQYJKoZI +hvcNAQEBBQADgY0AMIGJAoGBAKQeAbNDqfi9M2v86RUGAYgq1ZSDioFC6OLr0SwiOaYnLsSOl+Kx +O394PVwSGa6rZk1ErIZonyi15fU/0nHZLi8iHLB49EB5G3tCwh0s8NfqR9ck0/3Z+TXhVUdiJyJC +/z8x5I5lSUfzNEedJRidVvp6jVGr7P/SfoEfQQTLP3mBAgMBAAGjggKnMIICozA9BgkrBgEEAYI3 +FQcEMDAuBiYrBgEEAYI3FQiC3IMvhZOMZoXVnReC4twnge/sPGGBy54UhqiCWAIBZAIBBDAdBgNV +HQ4EFgQU1dVHhF0UVe7RXIz4cjl3/Vew+qowCwYDVR0PBAQDAgTwMB8GA1UdIwQYMBaAFHjhPp/S +ErN6PI3NMA5Ts0MpB7NVMD4GA1UdHwQ3MDUwM6AxoC+GLWh0dHA6Ly9jcmwuZS1jZXJ0Y2hpbGUu +Y2wvZWNlcnRjaGlsZWNhRkVTLmNybDA6BggrBgEFBQcBAQQuMCwwKgYIKwYBBQUHMAGGHmh0dHA6 +Ly9vY3NwLmVjZXJ0Y2hpbGUuY2wvb2NzcDAjBgNVHREEHDAaoBgGCCsGAQQBwQEBoAwWCjEzMTg1 +MDk1LTYwIwYDVR0SBBwwGqAYBggrBgEEAcEBAqAMFgo5NjkyODE4MC01MIIBTQYDVR0gBIIBRDCC +AUAwggE8BggrBgEEAcNSBTCCAS4wLQYIKwYBBQUHAgEWIWh0dHA6Ly93d3cuZS1jZXJ0Y2hpbGUu +Y2wvQ1BTLmh0bTCB/AYIKwYBBQUHAgIwge8egewAQwBlAHIAdABpAGYAaQBjAGEAZABvACAARgBp +AHIAbQBhACAAUwBpAG0AcABsAGUALgAgAEgAYQAgAHMAaQBkAG8AIAB2AGEAbABpAGQAYQBkAG8A +IABlAG4AIABmAG8AcgBtAGEAIABwAHIAZQBzAGUAbgBjAGkAYQBsACwAIABxAHUAZQBkAGEAbgBk +AG8AIABoAGEAYgBpAGwAaQB0AGEAZABvACAAZQBsACAAQwBlAHIAdABpAGYAaQBjAGEAZABvACAA +cABhAHIAYQAgAHUAcwBvACAAdAByAGkAYgB1AHQAYQByAGkAbzANBgkqhkiG9w0BAQUFAAOCAQEA +mxtPpXWslwI0+uJbyuS9s/S3/Vs0imn758xMU8t4BHUd+OlMdNAMQI1G2+q/OugdLQ/a9Sg3clKD +qXR4lHGl8d/Yq4yoJzDD3Ceez8qenY3JwGUhPzw9oDpg4mXWvxQDXSFeW/u/BgdadhfGnpwx61Un ++/fU24ZgU1dDJ4GKj5oIPHUIjmoSBhnstEhIr6GJWSTcDKTyzRdqBlaVhenH2Qs6Mw6FrOvRPuud +B7lo1+OgxMb/Gjyu6XnEaPu7Vq4XlLYMoCD2xrV7WEADaDTm7KcNLczVAYqWSF1WUqYSxmPoQDFY ++kMTThJyCXBlE0NADInrkwWgLLygkKI7zXkwaw== + + + +
\ No newline at end of file diff --git a/tests/test_dte_parse.py b/tests/test_dte_parse.py index d8f4198e..23870a52 100644 --- a/tests/test_dte_parse.py +++ b/tests/test_dte_parse.py @@ -291,6 +291,8 @@ def setUpClass(cls) -> None: 'test_data/sii-dte/DTE--76354771-K--33--170--cleaned.xml') cls.dte_clean_xml_2_xml_bytes = read_test_file_bytes( 'test_data/sii-dte/DTE--76399752-9--33--25568--cleaned.xml') + cls.dte_clean_xml_1b_xml_bytes = read_test_file_bytes( + 'test_data/sii-dte/DTE--76354771-K--33--170--cleaned-mod-empty-emails.xml') cls.dte_clean_xml_1_cert_pem_bytes = encoding_utils.clean_base64( crypto_utils.remove_pem_cert_header_footer( @@ -364,6 +366,32 @@ def test_parse_dte_xml_ok_1(self) -> None: receptor_email=None, )) + def test_parse_dte_xml_ok_1b(self) -> None: + xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_1b_xml_bytes) + + parsed_dte = parse_dte_xml(xml_doc) + self.assertDictEqual( + dict(parsed_dte.as_dict()), + dict( + emisor_rut=Rut('76354771-K'), + tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA, + folio=170, + fecha_emision_date=date(2019, 4, 1), + receptor_rut=Rut('96790240-3'), + monto_total=2996301, + emisor_razon_social='INGENIERIA ENACON SPA', + receptor_razon_social='MINERA LOS PELAMBRES', + fecha_vencimiento_date=None, + firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware( + dt=datetime(2019, 4, 1, 1, 36, 40), + tz=DteDataL2.DATETIME_FIELDS_TZ), + signature_value=self._TEST_DTE_1_SIGNATURE_VALUE, + signature_x509_cert_der=self.dte_clean_xml_1_cert_der, + emisor_giro='Ingenieria y Construccion', + emisor_email=None, + receptor_email=None, + )) + def test_parse_dte_xml_ok_2(self) -> None: xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_2_xml_bytes)