Skip to content
Merged
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.57.0
current_version = 0.58.0
commit = True
tag = False
message = chore: Bump version from {current_version} to {new_version}
Expand Down
6 changes: 6 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# History

## 0.58.0 (2025-09-22)

- (PR #890, 2025-09-16) cte: Add parser for "Datos del Contribuyente"
- (PR #896, 2025-09-16) rut: Add regex for canonical RUT that is compatible with JSON Schema
- (PR #895, 2025-09-16) cte: Add parser for "Propiedades y Bienes Raíces"

## 0.57.0 (2025-09-15)

- (PR #888, 2025-09-10) tests: Refactor and improve constants tests
Expand Down
2 changes: 1 addition & 1 deletion src/cl_sii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

"""

__version__ = '0.57.0'
__version__ = '0.58.0'
135 changes: 135 additions & 0 deletions src/cl_sii/cte/data_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

from collections.abc import Sequence
from datetime import date
from decimal import Decimal
from typing import Optional

import pydantic

Expand Down Expand Up @@ -38,3 +41,135 @@ class LegalRepresentative:
"""
Fecha de incorporación.
"""


@pydantic.dataclasses.dataclass(
frozen=True,
config=pydantic.ConfigDict(
arbitrary_types_allowed=True,
extra='forbid',
),
)
class TaxpayerData:
start_of_activities_date: Optional[date]
"""
Fecha de inicio de actividades.
"""
economic_activities: str
"""
Actividades Económicas
"""
tax_category: str
"""
Categoría Tributaria
"""
address: str
"""
Domicilio
"""
branches: Sequence[str]
"""
Sucursales
"""
last_filed_documents: Sequence[LastFiledDocument]
"""
Últimos documentos timbrados
"""
tax_observations: Optional[str] = None
"""
Observaciones tributarias
"""


@pydantic.dataclasses.dataclass(
frozen=True,
)
class LastFiledDocument:
name: str
date: date


@pydantic.dataclasses.dataclass(
frozen=True,
config=pydantic.ConfigDict(
arbitrary_types_allowed=True,
extra='forbid',
),
)
class TaxpayerProperties:
"""
Propiedades y Bienes Raíces (3)
"""

properties: Sequence[Property]


@pydantic.dataclasses.dataclass(
frozen=True,
)
class Property:
commune: Optional[str]
"""
Comuna
"""
role: Optional[str]
"""
Rol
"""
address: Optional[str]
"""
Dirección
"""
purpose: Optional[str]
"""
Destino
"""
fiscal_valuation: Optional[Decimal]
"""
Avalúo Fiscal
"""
overdue_installments: Optional[bool]
"""
Cuotas vencidas por pagar
"""
current_installments: Optional[bool]
"""
Cuotas vigentes por pagar
"""
condition: Optional[str]
"""
Condición
"""

###########################################################################
# Validators
###########################################################################

@pydantic.field_validator('fiscal_valuation', mode='before')
@classmethod
def parse_fiscal_valuation(cls, v: Optional[str]) -> Optional[Decimal]:
if isinstance(v, str):
v = v.replace('.', '').replace(',', '.')
return Decimal(v)
return v

@pydantic.field_validator('commune', 'role', 'address', 'purpose', 'condition')
@classmethod
def parse_str_fields(cls, v: Optional[str]) -> Optional[str]:
if isinstance(v, str) and not v.strip():
return None
return v

@pydantic.field_validator('current_installments', 'overdue_installments', mode='before')
@classmethod
def parse_boolean_fields(cls, v: Optional[str | bool]) -> Optional[bool]:
if isinstance(v, str):
if v == 'NO':
return False
elif v == 'SI':
return True
else:
return None
if isinstance(v, bool):
return v
return None
147 changes: 146 additions & 1 deletion src/cl_sii/cte/parsers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
from __future__ import annotations

from datetime import datetime

from bs4 import BeautifulSoup

from .data_models import LegalRepresentative, TaxpayerProvidedInfo
from .data_models import (
LastFiledDocument,
LegalRepresentative,
Property,
TaxpayerData,
TaxpayerProperties,
TaxpayerProvidedInfo,
)


def parse_taxpayer_provided_info(html_content: str) -> TaxpayerProvidedInfo:
Expand Down Expand Up @@ -89,3 +98,139 @@ def parse_taxpayer_provided_info(html_content: str) -> TaxpayerProvidedInfo:
company_formation=company_formation,
participation_in_existing_companies=participation_in_companies,
)


def parse_taxpayer_data(html_content: str) -> TaxpayerData:
"""
Parse the CTE HTML content to extract the content of the section:
"Datos del Contribuyente"

Args:
html_content: HTML string containing the taxpayer information table

Returns:
TaxpayerData instance with the parsed data
"""
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find('table', id='tbl_dbcontribuyente')
if not table:
raise ValueError("Could not find 'Datos del Contribuyente' table in HTML")

fecha_inicio_elem = table.find(id='td_fecha_inicio') # type: ignore[attr-defined]
if fecha_inicio_elem:
start_of_activities_date = (
datetime.strptime(fecha_inicio_elem.get_text(strip=True), "%d-%m-%Y").date()
if fecha_inicio_elem.get_text(strip=True)
else None
)
else:
start_of_activities_date = None

actividades_elem = table.find(id='td_actividades') # type: ignore[attr-defined]
if actividades_elem:
economic_activities = actividades_elem.get_text(separator="\n", strip=True)
else:
economic_activities = ""

categoria_elem = table.find(id='td_categoria') # type: ignore[attr-defined]
if categoria_elem:
tax_category = categoria_elem.get_text(strip=True)
else:
tax_category = ""

domicilio_elem = table.find(id='td_domicilio') # type: ignore[attr-defined]
if domicilio_elem:
address = domicilio_elem.get_text(strip=True)
else:
address = ""

# Sucursales
branches = []
sucursales_row = table.find( # type: ignore[attr-defined]
'td',
string=lambda s: s and 'Sucursales:' in s,
)
if sucursales_row:
sucursales_td = sucursales_row.find_next_sibling('td')
if sucursales_td:
branches_text = sucursales_td.get_text(separator="\n", strip=True)
branches = [b for b in branches_text.split("\n") if b]

# Últimos documentos timbrados
last_filed_documents = []
tim_nombre_elem = table.find(id='td_tim_nombre') # type: ignore[attr-defined]
tim_fecha_elem = table.find(id='td_tim_fecha') # type: ignore[attr-defined]
if tim_nombre_elem and tim_fecha_elem:
names = tim_nombre_elem.get_text(separator="\n", strip=True).split("\n")
dates = tim_fecha_elem.get_text(separator="\n", strip=True).split("\n")
for name, date_str in zip(names, dates):
if name and date_str:
doc_date = datetime.strptime(date_str, "%d-%m-%Y").date()
last_filed_documents.append(LastFiledDocument(name=name, date=doc_date))

# Observaciones tributarias
tax_observations = None
observaciones_elem = table.find(id='td_observaciones') # type: ignore[attr-defined]
if observaciones_elem:
tax_observations = observaciones_elem.get_text(strip=True)

return TaxpayerData(
start_of_activities_date=start_of_activities_date,
economic_activities=economic_activities,
tax_category=tax_category,
address=address,
branches=branches,
last_filed_documents=last_filed_documents,
tax_observations=tax_observations,
)


def parse_taxpayer_properties(html_content: str) -> TaxpayerProperties:
"""
Parse the CTE HTML content to extract the content of the section:
"Propiedades y Bienes Raíces (3)"

Args:
html_content: HTML string containing the taxpayer properties table

Returns:
TaxpayerProperties instance with the parsed data
"""
soup = BeautifulSoup(html_content, 'html.parser')

# Find the main table with id="tbl_propiedades"
table = soup.find('table', id='tbl_propiedades')
if not table:
raise ValueError("Could not find taxpayer information table in HTML")

properties = []
rows = table.find_all('tr') # type: ignore[attr-defined]
for row in rows[2:]: # Skip headers rows

# Skip rows without useful data
cells = row.find_all('td')
if len(cells) < 8:
continue

commune = cells[0].get_text(strip=True) or None
role = cells[1].get_text(strip=True) or None
address = cells[2].get_text(strip=True) or None
purpose = cells[3].get_text(strip=True) or None
fiscal_valuation = cells[4].get_text(strip=True) or None
overdue_installments = cells[5].get_text(strip=True) or None
current_installments = cells[6].get_text(strip=True) or None
condition = cells[7].get_text(strip=True) or None

properties.append(
Property(
commune=commune,
role=role,
address=address,
purpose=purpose,
fiscal_valuation=fiscal_valuation,
overdue_installments=overdue_installments,
current_installments=current_installments,
condition=condition,
)
)
return TaxpayerProperties(properties=properties)
9 changes: 2 additions & 7 deletions src/cl_sii/extras/pydantic_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from __future__ import annotations

import re
import sys
from typing import Any, ClassVar, Pattern

Expand Down Expand Up @@ -79,12 +78,8 @@ class _RutPydanticAnnotation:
>>> example_json_schema = example_type_adapter.json_schema()
"""

RUT_CANONICAL_STRICT_REGEX: ClassVar[Pattern] = re.compile(
re.sub(
pattern=r'\?P<\w+>',
repl='',
string=cl_sii.rut.constants.RUT_CANONICAL_STRICT_REGEX.pattern,
)
RUT_CANONICAL_STRICT_REGEX: ClassVar[Pattern] = (
cl_sii.rut.constants.RUT_CANONICAL_STRICT_JSON_SCHEMA_REGEX
)
"""
RUT (strict) regex for canonical format, without named groups.
Expand Down
12 changes: 12 additions & 0 deletions src/cl_sii/rut/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import re
from typing import Pattern

import cryptography.x509

Expand All @@ -22,6 +23,17 @@
RUT_DIGITS_MIN_VALUE = 1
"""RUT digits min value."""

RUT_CANONICAL_STRICT_JSON_SCHEMA_REGEX: Pattern[str] = re.compile("^(\\d{1,8})-([\\dK])$")
"""
RUT (strict) JSON Schema regex for canonical format.

This regex is compatible with JSON Schema and OpenAPI, which use the regular expression syntax from
JavaScript (ECMA 262), which does not support Python’s named groups.

.. tip:: If you need the regex as a string, for example to use it in a JSON Schema or
OpenAPI schema, use ``RUT_CANONICAL_STRICT_JSON_SCHEMA_REGEX.pattern``.
"""

SII_CERT_TITULAR_RUT_OID = cryptography.x509.oid.ObjectIdentifier("1.3.6.1.4.1.8321.1")
"""OID of the RUT of the certificate holder"""
# - Organismo: MINISTERIO DE ECONOMÍA / SUBSECRETARIA DE ECONOMIA
Expand Down
Loading
Loading