Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions src/cl_sii/cte/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from collections.abc import Sequence
from datetime import date
from decimal import Decimal
from typing import Optional

import pydantic
Expand Down Expand Up @@ -86,3 +87,89 @@ class TaxpayerData:
class LastFiledDocument:
name: str
date: date


@pydantic.dataclasses.dataclass(
frozen=True,
config=pydantic.ConfigDict(
arbitrary_types_allowed=True,
extra='forbid',
),
)
class TaxpayerProperties:
"""
Propiedades y Bienes Raíces (3)
"""

properties: Sequence[Property]


@pydantic.dataclasses.dataclass(
frozen=True,
)
class Property:
commune: Optional[str]
"""
Comuna
"""
role: Optional[str]
"""
Rol
"""
address: Optional[str]
"""
Dirección
"""
purpose: Optional[str]
"""
Destino
"""
fiscal_valuation: Optional[Decimal]
"""
Avalúo Fiscal
"""
overdue_installments: Optional[bool]
"""
Cuotas vencidas por pagar
"""
current_installments: Optional[bool]
"""
Cuotas vigentes por pagar
"""
condition: Optional[str]
"""
Condición
"""

###########################################################################
# Validators
###########################################################################

@pydantic.field_validator('fiscal_valuation', mode='before')
@classmethod
def parse_fiscal_valuation(cls, v: Optional[str]) -> Optional[Decimal]:
if isinstance(v, str):
v = v.replace('.', '').replace(',', '.')
return Decimal(v)
return v

@pydantic.field_validator('commune', 'role', 'address', 'purpose', 'condition')
@classmethod
def parse_str_fields(cls, v: Optional[str]) -> Optional[str]:
if isinstance(v, str) and not v.strip():
return None
return v

@pydantic.field_validator('current_installments', 'overdue_installments', mode='before')
@classmethod
def parse_boolean_fields(cls, v: Optional[str | bool]) -> Optional[bool]:
if isinstance(v, str):
if v == 'NO':
return False
elif v == 'SI':
return True
else:
return None
if isinstance(v, bool):
return v
return None
60 changes: 59 additions & 1 deletion src/cl_sii/cte/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@

from bs4 import BeautifulSoup

from .data_models import LastFiledDocument, LegalRepresentative, TaxpayerData, TaxpayerProvidedInfo
from .data_models import (
LastFiledDocument,
LegalRepresentative,
Property,
TaxpayerData,
TaxpayerProperties,
TaxpayerProvidedInfo,
)


def parse_taxpayer_provided_info(html_content: str) -> TaxpayerProvidedInfo:
Expand Down Expand Up @@ -176,3 +183,54 @@ def parse_taxpayer_data(html_content: str) -> TaxpayerData:
last_filed_documents=last_filed_documents,
tax_observations=tax_observations,
)


def parse_taxpayer_properties(html_content: str) -> TaxpayerProperties:
"""
Parse the CTE HTML content to extract the content of the section:
"Propiedades y Bienes Raíces (3)"

Args:
html_content: HTML string containing the taxpayer properties table

Returns:
TaxpayerProperties instance with the parsed data
"""
soup = BeautifulSoup(html_content, 'html.parser')

# Find the main table with id="tbl_propiedades"
table = soup.find('table', id='tbl_propiedades')
if not table:
raise ValueError("Could not find taxpayer information table in HTML")

properties = []
rows = table.find_all('tr') # type: ignore[attr-defined]
for row in rows[2:]: # Skip headers rows

# Skip rows without useful data
cells = row.find_all('td')
if len(cells) < 8:
continue

commune = cells[0].get_text(strip=True) or None
role = cells[1].get_text(strip=True) or None
address = cells[2].get_text(strip=True) or None
purpose = cells[3].get_text(strip=True) or None
fiscal_valuation = cells[4].get_text(strip=True) or None
overdue_installments = cells[5].get_text(strip=True) or None
current_installments = cells[6].get_text(strip=True) or None
condition = cells[7].get_text(strip=True) or None

properties.append(
Property(
commune=commune,
role=role,
address=address,
purpose=purpose,
fiscal_valuation=fiscal_valuation,
overdue_installments=overdue_installments,
current_installments=current_installments,
condition=condition,
)
)
return TaxpayerProperties(properties=properties)
53 changes: 52 additions & 1 deletion src/tests/test_cte_parsers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from datetime import date
from decimal import Decimal
from unittest import TestCase

from cl_sii.cte import data_models, parsers
Expand Down Expand Up @@ -42,7 +43,7 @@ def test_parse_taxpayer_provided_info(self) -> None:
)
self.assertEqual(result, expected_obj)

with self.subTest("Parsing emtpy content"):
with self.subTest("Parsing empty content"):
with self.assertRaises(ValueError) as assert_raises_cm:
parsers.parse_taxpayer_provided_info("")

Expand Down Expand Up @@ -105,3 +106,53 @@ def test_parse_taxpayer_data(self) -> None:
tax_observations=None,
)
self.assertEqual(result, expected_obj)

def test_parse_taxpayer_properties(self) -> None:
html_content = read_test_file_str_utf8('test_data/sii-cte/cte_empty_f29.html')

with self.subTest("Parsing ok"):
result = parsers.parse_taxpayer_properties(html_content)
expected_obj = data_models.TaxpayerProperties(
properties=[
data_models.Property(
commune="LAS CONDES",
role="123-4",
address="Av. Apoquindo 1234",
purpose="HABITACIONAL",
fiscal_valuation=Decimal('46550332'),
overdue_installments=True,
current_installments=True,
condition="AFECTO",
),
data_models.Property(
commune="PROVIDENCIA",
role="567-8",
address="Calle 10 #456",
purpose="COMERCIAL",
fiscal_valuation=None,
overdue_installments=False,
current_installments=False,
condition="EXENTO",
),
data_models.Property(
commune="ÑUÑOA",
role=None,
address=None,
purpose="INDUSTRIAL",
fiscal_valuation=Decimal('78456789'),
overdue_installments=False,
current_installments=False,
condition="AFECTO",
),
],
)
self.assertEqual(result, expected_obj)

with self.subTest("Parsing empty content"):
with self.assertRaises(ValueError) as assert_raises_cm:
parsers.parse_taxpayer_properties("")

self.assertEqual(
assert_raises_cm.exception.args,
("Could not find taxpayer information table in HTML",),
)
33 changes: 29 additions & 4 deletions src/tests/test_data/sii-cte/cte_empty_f29.html
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,36 @@
<td width="10%"><span class="textof">Cuotas vigentes por pagar</span></td>
<td width="13%"><span class="textof">Condición<br>(4)</span></td>
</tr>


<!-- MOCK PROPERTY ROWS -->
<tr>
<td colspan="8" class="centeralign"><span class="textof">- No se registra información para este RUT -</span>
</td>
<td>LAS CONDES</td>
<td>123-4</td>
<td>Av. Apoquindo 1234</td>
<td>HABITACIONAL</td>
<td>46.550.332</td>
<td>SI</td>
<td>SI</td>
<td>AFECTO</td>
</tr>
<tr>
<td>PROVIDENCIA</td>
<td>567-8</td>
<td>Calle 10 #456</td>
<td>COMERCIAL</td>
<td></td>
<td>NO</td>
<td>NO</td>
<td>EXENTO</td>
</tr>
<tr>
<td>ÑUÑOA</td>
<td></td>
<td></td>
<td>INDUSTRIAL</td>
<td>78.456.789</td>
<td>NO</td>
<td>NO</td>
<td>AFECTO</td>
</tr>

<tr>
Expand Down
Loading