Article 1
-Some text with bold and emphasis
-
- diff --git a/Cargo.toml b/Cargo.toml index f054fd4..4f41e57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,11 @@ members = [ resolver = "2" [workspace.dependencies] -pyo3 = { version = "0.27.0", features = ["extension-module"] } +pyo3 = { version = "0.27.1", features = ["extension-module"] } quick-xml = "0.38.3" # https://ohadravid.github.io/posts/2023-03-rusty-python [profile.release] -debug = true # Debug symbols for profiler. -lto = true # Link-time optimization. -codegen-units = 1 # Slower compilation but faster code. +debug = true # Debug symbols for profiler. +lto = true # Link-time optimization. +codegen-units = 1 # Slower compilation but faster code. diff --git a/README.md b/README.md index b8ebc79..52bbb5c 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,24 @@ print(captured) # } ``` +## Architecture + +This project uses a multi-crate Rust workspace structure to maintain clean separation of concerns: + +### Crate structure + +- **`djc-html-transformer`**: Pure Rust library for HTML transformation +- **`djc-template-parser`**: Pure Rust library for Django template parsing +- **`djc-core`**: Python bindings that combines all other libraries + +### Design philosophy + +To make sense of the code, the Python API and Rust logic are defined separately: + +1. Each crate (AKA Rust package) has `lib.rs` (which is like Python's `__init__.py`). These files do not define the main logic, but only the public API of the crate. So the API that's to be used by other crates. +2. The `djc-core` crate imports other crates +3. And it is only this `djc-core` where we define the Python API using PyO3. + ## Development 1. Setup python env diff --git a/crates/djc-core/Cargo.toml b/crates/djc-core/Cargo.toml index dea3d94..c8ed537 100644 --- a/crates/djc-core/Cargo.toml +++ b/crates/djc-core/Cargo.toml @@ -1,5 +1,6 @@ [package] name = "djc-core" +description = "Singular Python API for Rust code used by django-components" version = "1.1.0" edition = "2021" diff --git a/crates/djc-core/src/lib.rs b/crates/djc-core/src/lib.rs index 111c825..dcc7135 100644 --- a/crates/djc-core/src/lib.rs +++ b/crates/djc-core/src/lib.rs @@ -1,9 +1,76 @@ -use djc_html_transformer::set_html_attributes; +use djc_html_transformer::{ + set_html_attributes as set_html_attributes_rust, HtmlTransformerConfig, +}; +use pyo3::exceptions::{PyValueError}; use pyo3::prelude::*; +use pyo3::types::{PyDict, PyTuple}; -/// A Python module implemented in Rust for high-performance transformations. +/// Singular Python API that brings togther all the other Rust crates. #[pymodule] fn djc_core(m: &Bound<'_, PyModule>) -> PyResult<()> { + // HTML transformer m.add_function(wrap_pyfunction!(set_html_attributes, m)?)?; Ok(()) } + +/// Transform HTML by adding attributes to the elements. +/// +/// Args: +/// html (str): The HTML string to transform. Can be a fragment or full document. +/// root_attributes (List[str]): List of attribute names to add to root elements only. +/// all_attributes (List[str]): List of attribute names to add to all elements. +/// check_end_names (bool, optional): Whether to validate matching of end tags. Defaults to false. +/// watch_on_attribute (str, optional): If set, captures which attributes were added to elements with this attribute. +/// +/// Returns: +/// Tuple[str, Dict[str, List[str]]]: A tuple containing: +/// - The transformed HTML string +/// - A dictionary mapping captured attribute values to lists of attributes that were added +/// to those elements. Only returned if watch_on_attribute is set, otherwise empty dict. +/// +/// Example: +/// >>> html = '
Hello
Hello
Hello
Some text with bold and emphasis
-
- Hello
Text", // Non-matching end tag - ]; - - for input in lenient_cases { - assert!(transform(&config, input).is_ok()); - } - - // Test with check_end_names = true (strict mode) - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-v-123".to_string()], - true, // Check end names - None, - ); - - // These should fail with check_end_names = true - for input in lenient_cases { - assert!(transform(&config, input).is_err()); - } - - // But well-formed HTML should still work - let valid_input = "
Hello
Regular element
- Nested element -
-
"#));
-
- // Verify attribute capturing
- assert_eq!(captured.len(), 3);
- assert!(captured.iter().any(|(id, attrs)| id == "123"
- && attrs.contains(&"data-root".to_string())
- && attrs.contains(&"data-v-123".to_string())));
- assert!(captured
- .iter()
- .any(|(id, attrs)| id == "456" && attrs.contains(&"data-v-123".to_string())));
- assert!(captured
- .iter()
- .any(|(id, attrs)| id == "789" && attrs.contains(&"data-v-123".to_string())));
- }
+ transform(config, html)
}
diff --git a/crates/djc-html-transformer/src/transformer.rs b/crates/djc-html-transformer/src/transformer.rs
new file mode 100644
index 0000000..52c4fc3
--- /dev/null
+++ b/crates/djc-html-transformer/src/transformer.rs
@@ -0,0 +1,406 @@
+use quick_xml::events::{BytesStart, Event};
+use quick_xml::reader::Reader;
+use quick_xml::writer::Writer;
+use std::collections::HashSet;
+use std::io::Cursor;
+
+// List of HTML5 void elements. These can be written as `Hello
Some text with bold and emphasis
+
+ Hello
Text", // Non-matching end tag + ]; + + for input in lenient_cases { + assert!(transform(&config, input).is_ok()); + } + + // Test with check_end_names = true (strict mode) + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-v-123".to_string()], + true, // Check end names + None, + ); + + // These should fail with check_end_names = true + for input in lenient_cases { + assert!(transform(&config, input).is_err()); + } + + // But well-formed HTML should still work + let valid_input = "
Hello
Regular element
+ Nested element +
+
"#));
+
+ // Verify attribute capturing
+ assert_eq!(captured.len(), 3);
+ assert!(captured.iter().any(|(id, attrs)| id == "123"
+ && attrs.contains(&"data-root".to_string())
+ && attrs.contains(&"data-v-123".to_string())));
+ assert!(captured
+ .iter()
+ .any(|(id, attrs)| id == "456" && attrs.contains(&"data-v-123".to_string())));
+ assert!(captured
+ .iter()
+ .any(|(id, attrs)| id == "789" && attrs.contains(&"data-v-123".to_string())));
+ }
+}
diff --git a/djc_core/__init__.py b/djc_core/__init__.py
index 68add3b..2dfb9ee 100644
--- a/djc_core/__init__.py
+++ b/djc_core/__init__.py
@@ -1,6 +1,6 @@
-# DO NOT MODIFY, ONLY UPDATE THE MODULE NAME WHEN NEEDED!
# This file is what maturin auto-generates. But it seems maturin omits it when we have a __init__.pyi file.
# So we have to manually include it here.
+# Following block of code is what maturin would've generated
from .djc_core import *
diff --git a/djc_core/__init__.pyi b/djc_core/__init__.pyi
index eeeff7c..1774962 100644
--- a/djc_core/__init__.pyi
+++ b/djc_core/__init__.pyi
@@ -1,34 +1 @@
-from typing import List, Dict, Optional
-
-def set_html_attributes(
- html: str,
- root_attributes: List[str],
- all_attributes: List[str],
- check_end_names: Optional[bool] = None,
- watch_on_attribute: Optional[str] = None,
-) -> tuple[str, Dict[str, List[str]]]:
- """
- Transform HTML by adding attributes to root and all elements.
-
- Args:
- html (str): The HTML string to transform. Can be a fragment or full document.
- root_attributes (List[str]): List of attribute names to add to root elements only.
- all_attributes (List[str]): List of attribute names to add to all elements.
- check_end_names (Optional[bool]): Whether to validate matching of end tags. Defaults to None.
- watch_on_attribute (Optional[str]): If set, captures which attributes were added to elements with this attribute.
-
- Returns:
- A tuple containing:
- - The transformed HTML string
- - A dictionary mapping captured attribute values to lists of attributes that were added
- to those elements. Only returned if watch_on_attribute is set, otherwise empty dict.
-
- Example:
- >>> html = 'Hello
Hello
Hello
Hello