From 1738db4864dfc9e3ae3a451d0063088e3939bb5e Mon Sep 17 00:00:00 2001 From: Juro Oravec Date: Wed, 22 Oct 2025 17:46:33 +0200 Subject: [PATCH] refactor: restructure codebase as monorepo --- CHANGELOG.md | 5 ++++ Cargo.lock | 11 +++++++- Cargo.toml | 17 +++++------- README.md | 26 ++++++++++++------- crates/djc-core/Cargo.toml | 14 ++++++++++ crates/djc-core/src/lib.rs | 9 +++++++ crates/djc-html-transformer/Cargo.toml | 8 ++++++ .../djc-html-transformer/src/lib.rs | 0 .../__init__.py | 8 +++--- .../__init__.pyi | 0 {djc_core_html_parser => djc_core}/py.typed | 0 pyproject.toml | 22 +++++++++------- src/lib.rs | 10 ------- tests/benchmark.py | 2 +- ...tml_parser.py => test_html_transformer.py} | 2 +- 15 files changed, 87 insertions(+), 47 deletions(-) create mode 100644 crates/djc-core/Cargo.toml create mode 100644 crates/djc-core/src/lib.rs create mode 100644 crates/djc-html-transformer/Cargo.toml rename src/html_parser.rs => crates/djc-html-transformer/src/lib.rs (100%) rename {djc_core_html_parser => djc_core}/__init__.py (56%) rename {djc_core_html_parser => djc_core}/__init__.pyi (100%) rename {djc_core_html_parser => djc_core}/py.typed (100%) delete mode 100644 src/lib.rs rename tests/{test_html_parser.py => test_html_transformer.py} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4160fc0..2240bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Release notes +## v1.1.0 + +- Renamed package from `djc-core-html-parser` to `djc-core` +- Refactored project into a monorepo + ## v1.0.3 - Update to Python 3.14 diff --git a/Cargo.lock b/Cargo.lock index bc7c6a9..e133bd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,7 +9,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] -name = "djc_core_html_parser" +name = "djc-core" +version = "1.1.0" +dependencies = [ + "djc-html-transformer", + "pyo3", + "quick-xml", +] + +[[package]] +name = "djc-html-transformer" version = "1.0.3" dependencies = [ "pyo3", diff --git a/Cargo.toml b/Cargo.toml index dae24e3..f054fd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,14 +1,11 @@ -[package] -name = "djc_core_html_parser" -version = "1.0.3" -edition = "2021" +[workspace] +members = [ + "crates/djc-core", + "crates/djc-html-transformer", +] +resolver = "2" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -name = "djc_core_html_parser" -crate-type = ["cdylib"] - -[dependencies] +[workspace.dependencies] pyo3 = { version = "0.27.0", features = ["extension-module"] } quick-xml = "0.38.3" diff --git a/README.md b/README.md index afc6050..b8ebc79 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,27 @@ -# djc-core-html-parser +# djc-core -[![PyPI - Version](https://img.shields.io/pypi/v/djc-core-html-parser)](https://pypi.org/project/djc-core-html-parser/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/djc-core-html-parser)](https://pypi.org/project/djc-core-html-parser/) [![PyPI - License](https://img.shields.io/pypi/l/djc-core-html-parser)](https://github.com/django-components/djc-core-html-parser/blob/master/LICENSE/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/djc-core-html-parser)](https://pypistats.org/packages/djc-core-html-parser) [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/django-components/djc-core-html-parser/tests.yml)](https://github.com/django-components/djc-core-html-parser/actions/workflows/tests.yml) +[![PyPI - Version](https://img.shields.io/pypi/v/djc-core)](https://pypi.org/project/djc-core/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/djc-core)](https://pypi.org/project/djc-core/) [![PyPI - License](https://img.shields.io/pypi/l/djc-core)](https://github.com/django-components/djc-core/blob/master/LICENSE/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/djc-core)](https://pypistats.org/packages/djc-core) [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/django-components/djc-core/tests.yml)](https://github.com/django-components/djc-core/actions/workflows/tests.yml) -HTML parser used by [django-components](https://github.com/django-components/django-components). Written in Rust, exposed as a Python package with [maturin](https://www.maturin.rs/). - -This implementation was found to be 40-50x faster than our Python implementation, taking ~90ms to parse 5 MB of HTML. +Rust-based parsers and toolings used by [django-components](https://github.com/django-components/django-components). Exposed as a Python package with [maturin](https://www.maturin.rs/). ## Installation ```sh -pip install djc-core-html-parser +pip install djc-core ``` -## Usage +## Packages + +### HTML transfomer + +Transform HTML in a single pass. This is a simple implementation. + +This implementation was found to be 40-50x faster than our Python implementation, taking ~90ms to parse 5 MB of HTML. + +**Usage** ```python -from djc_core_html_parser import set_html_attributes +from djc_core import set_html_attributes html = '

Hello

' result, _ = set_html_attributes( @@ -39,7 +45,7 @@ Then, during the HTML transformation, we check each element for this attribute. 2. Record the attributes that were added to the element, using the value of the watched attribute as the key. ```python -from djc_core_html_parser import set_html_attributes +from djc_core import set_html_attributes html = """
@@ -117,4 +123,4 @@ To publish a new version of the package, you need to: 1. Bump the version in `pyproject.toml` and `Cargo.toml` 2. Open a PR and merge it to `main`. -3. Create a new tag on the `main` branch with the new version number (e.g. `v1.0.0`), or create a new release in the GitHub UI. +3. Create a new tag on the `main` branch with the new version number (e.g. `1.0.0`), or create a new release in the GitHub UI. diff --git a/crates/djc-core/Cargo.toml b/crates/djc-core/Cargo.toml new file mode 100644 index 0000000..dea3d94 --- /dev/null +++ b/crates/djc-core/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "djc-core" +version = "1.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "djc_core" +crate-type = ["cdylib"] + +[dependencies] +djc-html-transformer = { path = "../djc-html-transformer" } +pyo3 = { workspace = true } +quick-xml = { workspace = true } diff --git a/crates/djc-core/src/lib.rs b/crates/djc-core/src/lib.rs new file mode 100644 index 0000000..111c825 --- /dev/null +++ b/crates/djc-core/src/lib.rs @@ -0,0 +1,9 @@ +use djc_html_transformer::set_html_attributes; +use pyo3::prelude::*; + +/// A Python module implemented in Rust for high-performance transformations. +#[pymodule] +fn djc_core(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(set_html_attributes, m)?)?; + Ok(()) +} diff --git a/crates/djc-html-transformer/Cargo.toml b/crates/djc-html-transformer/Cargo.toml new file mode 100644 index 0000000..d47e672 --- /dev/null +++ b/crates/djc-html-transformer/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "djc-html-transformer" +version = "1.0.3" +edition = "2021" + +[dependencies] +pyo3 = { workspace = true } +quick-xml = { workspace = true } diff --git a/src/html_parser.rs b/crates/djc-html-transformer/src/lib.rs similarity index 100% rename from src/html_parser.rs rename to crates/djc-html-transformer/src/lib.rs diff --git a/djc_core_html_parser/__init__.py b/djc_core/__init__.py similarity index 56% rename from djc_core_html_parser/__init__.py rename to djc_core/__init__.py index 0b84cc2..68add3b 100644 --- a/djc_core_html_parser/__init__.py +++ b/djc_core/__init__.py @@ -2,8 +2,8 @@ # This file is what maturin auto-generates. But it seems maturin omits it when we have a __init__.pyi file. # So we have to manually include it here. -from .djc_core_html_parser import * +from .djc_core import * -__doc__ = djc_core_html_parser.__doc__ -if hasattr(djc_core_html_parser, "__all__"): - __all__ = djc_core_html_parser.__all__ +__doc__ = djc_core.__doc__ +if hasattr(djc_core, "__all__"): + __all__ = djc_core.__all__ diff --git a/djc_core_html_parser/__init__.pyi b/djc_core/__init__.pyi similarity index 100% rename from djc_core_html_parser/__init__.pyi rename to djc_core/__init__.pyi diff --git a/djc_core_html_parser/py.typed b/djc_core/py.typed similarity index 100% rename from djc_core_html_parser/py.typed rename to djc_core/py.typed diff --git a/pyproject.toml b/pyproject.toml index 5e4bb6d..690a073 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,8 +3,8 @@ requires = ["maturin>=1.8,<2.0"] build-backend = "maturin" [project] -name = "djc_core_html_parser" -version = "1.0.3" +name = "djc_core" +version = "1.1.0" requires-python = ">=3.8, <4.0" description = "HTML parser used by django-components written in Rust." keywords = ["django", "components", "html"] @@ -31,17 +31,19 @@ license = {text = "MIT"} # See https://docs.pypi.org/project_metadata/#icons [project.urls] -Homepage = "https://github.com/django-components/djc-core-html-parser/" -Changelog = "https://github.com/django-components/djc-core-html-parser/blob/main/CHANGELOG.md" -Issues = "https://github.com/django-components/djc-core-html-parser/issues" +Homepage = "https://github.com/django-components/djc-core/" +Changelog = "https://github.com/django-components/djc-core/blob/main/CHANGELOG.md" +Issues = "https://github.com/django-components/djc-core/issues" Donate = "https://github.com/sponsors/EmilStenstrom" [tool.maturin] +# This is the crate that will be exposed to Python +manifest-path = "crates/djc-core/Cargo.toml" features = ["pyo3/extension-module"] include = [ - "djc_core_html_parser/__init__.py", - "djc_core_html_parser/__init__.pyi", - "djc_core_html_parser/py.typed", + "djc_core/__init__.py", + "djc_core/__init__.pyi", + "djc_core/py.typed", ] [tool.black] @@ -67,7 +69,7 @@ profile = "black" line_length = 119 multi_line_output = 3 include_trailing_comma = "True" -known_first_party = "djc_core_html_parser" +known_first_party = "djc_core" [tool.flake8] ignore = ['E302', 'W503'] @@ -92,7 +94,7 @@ exclude = [ ] [[tool.mypy.overrides]] -module = "djc_core_html_parser.*" +module = "djc_core.*" disallow_untyped_defs = true diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 4d591d1..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,10 +0,0 @@ -use pyo3::prelude::*; - -mod html_parser; - -/// A Python module implemented in Rust for high-performance HTML transformation. -#[pymodule] -fn djc_core_html_parser(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_function(wrap_pyfunction!(html_parser::set_html_attributes, m)?)?; - Ok(()) -} diff --git a/tests/benchmark.py b/tests/benchmark.py index 1cf02b1..9691812 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -1,7 +1,7 @@ from statistics import mean, stdev import time -from djc_core_html_parser import set_html_attributes +from djc_core import set_html_attributes def generate_large_html(num_elements: int = 1000) -> str: diff --git a/tests/test_html_parser.py b/tests/test_html_transformer.py similarity index 99% rename from tests/test_html_parser.py rename to tests/test_html_transformer.py index 57cf3b1..8b93df6 100644 --- a/tests/test_html_parser.py +++ b/tests/test_html_transformer.py @@ -1,7 +1,7 @@ # This same set of tests is also found in django-components, to ensure that # this implementation can be replaced with the django-components' pure-python implementation -from djc_core_html_parser import set_html_attributes +from djc_core import set_html_attributes from typing import Dict, List