From c357286fe1f60879d2a98ec51f9c241ba3e32f48 Mon Sep 17 00:00:00 2001 From: Samuel Villegas Date: Tue, 16 Sep 2025 12:57:32 -0300 Subject: [PATCH] =?UTF-8?q?feat(cte):=20Add=20parser=20for=20"Propiedades?= =?UTF-8?q?=20y=20Bienes=20Ra=C3=ADces"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Implemented `parse_taxpayer_properties` to parse property table from CTE HTML. - Added `Property` and `TaxpayerProperties` data models. - Created tests to validate parser functionality with sample HTML input. Ref: https://app.shortcut.com/cordada/story/16536/ --- src/cl_sii/cte/data_models.py | 87 +++++++++++++++++++ src/cl_sii/cte/parsers.py | 60 ++++++++++++- src/tests/test_cte_parsers.py | 53 ++++++++++- .../test_data/sii-cte/cte_empty_f29.html | 33 ++++++- 4 files changed, 227 insertions(+), 6 deletions(-) diff --git a/src/cl_sii/cte/data_models.py b/src/cl_sii/cte/data_models.py index c38b1809..fb69782a 100644 --- a/src/cl_sii/cte/data_models.py +++ b/src/cl_sii/cte/data_models.py @@ -2,6 +2,7 @@ from collections.abc import Sequence from datetime import date +from decimal import Decimal from typing import Optional import pydantic @@ -86,3 +87,89 @@ class TaxpayerData: class LastFiledDocument: name: str date: date + + +@pydantic.dataclasses.dataclass( + frozen=True, + config=pydantic.ConfigDict( + arbitrary_types_allowed=True, + extra='forbid', + ), +) +class TaxpayerProperties: + """ + Propiedades y Bienes Raíces (3) + """ + + properties: Sequence[Property] + + +@pydantic.dataclasses.dataclass( + frozen=True, +) +class Property: + commune: Optional[str] + """ + Comuna + """ + role: Optional[str] + """ + Rol + """ + address: Optional[str] + """ + Dirección + """ + purpose: Optional[str] + """ + Destino + """ + fiscal_valuation: Optional[Decimal] + """ + Avalúo Fiscal + """ + overdue_installments: Optional[bool] + """ + Cuotas vencidas por pagar + """ + current_installments: Optional[bool] + """ + Cuotas vigentes por pagar + """ + condition: Optional[str] + """ + Condición + """ + + ########################################################################### + # Validators + ########################################################################### + + @pydantic.field_validator('fiscal_valuation', mode='before') + @classmethod + def parse_fiscal_valuation(cls, v: Optional[str]) -> Optional[Decimal]: + if isinstance(v, str): + v = v.replace('.', '').replace(',', '.') + return Decimal(v) + return v + + @pydantic.field_validator('commune', 'role', 'address', 'purpose', 'condition') + @classmethod + def parse_str_fields(cls, v: Optional[str]) -> Optional[str]: + if isinstance(v, str) and not v.strip(): + return None + return v + + @pydantic.field_validator('current_installments', 'overdue_installments', mode='before') + @classmethod + def parse_boolean_fields(cls, v: Optional[str | bool]) -> Optional[bool]: + if isinstance(v, str): + if v == 'NO': + return False + elif v == 'SI': + return True + else: + return None + if isinstance(v, bool): + return v + return None diff --git a/src/cl_sii/cte/parsers.py b/src/cl_sii/cte/parsers.py index 5242cfef..e49a214d 100644 --- a/src/cl_sii/cte/parsers.py +++ b/src/cl_sii/cte/parsers.py @@ -4,7 +4,14 @@ from bs4 import BeautifulSoup -from .data_models import LastFiledDocument, LegalRepresentative, TaxpayerData, TaxpayerProvidedInfo +from .data_models import ( + LastFiledDocument, + LegalRepresentative, + Property, + TaxpayerData, + TaxpayerProperties, + TaxpayerProvidedInfo, +) def parse_taxpayer_provided_info(html_content: str) -> TaxpayerProvidedInfo: @@ -176,3 +183,54 @@ def parse_taxpayer_data(html_content: str) -> TaxpayerData: last_filed_documents=last_filed_documents, tax_observations=tax_observations, ) + + +def parse_taxpayer_properties(html_content: str) -> TaxpayerProperties: + """ + Parse the CTE HTML content to extract the content of the section: + "Propiedades y Bienes Raíces (3)" + + Args: + html_content: HTML string containing the taxpayer properties table + + Returns: + TaxpayerProperties instance with the parsed data + """ + soup = BeautifulSoup(html_content, 'html.parser') + + # Find the main table with id="tbl_propiedades" + table = soup.find('table', id='tbl_propiedades') + if not table: + raise ValueError("Could not find taxpayer information table in HTML") + + properties = [] + rows = table.find_all('tr') # type: ignore[attr-defined] + for row in rows[2:]: # Skip headers rows + + # Skip rows without useful data + cells = row.find_all('td') + if len(cells) < 8: + continue + + commune = cells[0].get_text(strip=True) or None + role = cells[1].get_text(strip=True) or None + address = cells[2].get_text(strip=True) or None + purpose = cells[3].get_text(strip=True) or None + fiscal_valuation = cells[4].get_text(strip=True) or None + overdue_installments = cells[5].get_text(strip=True) or None + current_installments = cells[6].get_text(strip=True) or None + condition = cells[7].get_text(strip=True) or None + + properties.append( + Property( + commune=commune, + role=role, + address=address, + purpose=purpose, + fiscal_valuation=fiscal_valuation, + overdue_installments=overdue_installments, + current_installments=current_installments, + condition=condition, + ) + ) + return TaxpayerProperties(properties=properties) diff --git a/src/tests/test_cte_parsers.py b/src/tests/test_cte_parsers.py index ab1c5d4c..bdd09c02 100644 --- a/src/tests/test_cte_parsers.py +++ b/src/tests/test_cte_parsers.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import date +from decimal import Decimal from unittest import TestCase from cl_sii.cte import data_models, parsers @@ -42,7 +43,7 @@ def test_parse_taxpayer_provided_info(self) -> None: ) self.assertEqual(result, expected_obj) - with self.subTest("Parsing emtpy content"): + with self.subTest("Parsing empty content"): with self.assertRaises(ValueError) as assert_raises_cm: parsers.parse_taxpayer_provided_info("") @@ -105,3 +106,53 @@ def test_parse_taxpayer_data(self) -> None: tax_observations=None, ) self.assertEqual(result, expected_obj) + + def test_parse_taxpayer_properties(self) -> None: + html_content = read_test_file_str_utf8('test_data/sii-cte/cte_empty_f29.html') + + with self.subTest("Parsing ok"): + result = parsers.parse_taxpayer_properties(html_content) + expected_obj = data_models.TaxpayerProperties( + properties=[ + data_models.Property( + commune="LAS CONDES", + role="123-4", + address="Av. Apoquindo 1234", + purpose="HABITACIONAL", + fiscal_valuation=Decimal('46550332'), + overdue_installments=True, + current_installments=True, + condition="AFECTO", + ), + data_models.Property( + commune="PROVIDENCIA", + role="567-8", + address="Calle 10 #456", + purpose="COMERCIAL", + fiscal_valuation=None, + overdue_installments=False, + current_installments=False, + condition="EXENTO", + ), + data_models.Property( + commune="ÑUÑOA", + role=None, + address=None, + purpose="INDUSTRIAL", + fiscal_valuation=Decimal('78456789'), + overdue_installments=False, + current_installments=False, + condition="AFECTO", + ), + ], + ) + self.assertEqual(result, expected_obj) + + with self.subTest("Parsing empty content"): + with self.assertRaises(ValueError) as assert_raises_cm: + parsers.parse_taxpayer_properties("") + + self.assertEqual( + assert_raises_cm.exception.args, + ("Could not find taxpayer information table in HTML",), + ) diff --git a/src/tests/test_data/sii-cte/cte_empty_f29.html b/src/tests/test_data/sii-cte/cte_empty_f29.html index e3189952..0f057d65 100644 --- a/src/tests/test_data/sii-cte/cte_empty_f29.html +++ b/src/tests/test_data/sii-cte/cte_empty_f29.html @@ -220,11 +220,36 @@ Cuotas vigentes por pagar Condición
(4)
- - + - - No se registra información para este RUT - - + LAS CONDES + 123-4 + Av. Apoquindo 1234 + HABITACIONAL + 46.550.332 + SI + SI + AFECTO + + + PROVIDENCIA + 567-8 + Calle 10 #456 + COMERCIAL + + NO + NO + EXENTO + + + ÑUÑOA + + + INDUSTRIAL + 78.456.789 + NO + NO + AFECTO