From 732a7a7543012dd5022fc821895d58afb27b7fd7 Mon Sep 17 00:00:00 2001 From: Juro Oravec Date: Thu, 23 Oct 2025 09:35:13 +0200 Subject: [PATCH 1/3] refactor: move python public API to djc-core create --- Cargo.toml | 8 +- crates/djc-core/Cargo.toml | 1 + crates/djc-core/src/lib.rs | 71 ++- crates/djc-html-transformer/Cargo.toml | 2 +- crates/djc-html-transformer/src/lib.rs | 472 +----------------- .../djc-html-transformer/src/transformer.rs | 406 +++++++++++++++ djc_core/__init__.py | 2 +- djc_core/__init__.pyi | 35 +- djc_core/djc_html_transformer.pyi | 36 ++ 9 files changed, 532 insertions(+), 501 deletions(-) create mode 100644 crates/djc-html-transformer/src/transformer.rs create mode 100644 djc_core/djc_html_transformer.pyi diff --git a/Cargo.toml b/Cargo.toml index f054fd4..4f41e57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,11 @@ members = [ resolver = "2" [workspace.dependencies] -pyo3 = { version = "0.27.0", features = ["extension-module"] } +pyo3 = { version = "0.27.1", features = ["extension-module"] } quick-xml = "0.38.3" # https://ohadravid.github.io/posts/2023-03-rusty-python [profile.release] -debug = true # Debug symbols for profiler. -lto = true # Link-time optimization. -codegen-units = 1 # Slower compilation but faster code. +debug = true # Debug symbols for profiler. +lto = true # Link-time optimization. +codegen-units = 1 # Slower compilation but faster code. diff --git a/crates/djc-core/Cargo.toml b/crates/djc-core/Cargo.toml index dea3d94..c8ed537 100644 --- a/crates/djc-core/Cargo.toml +++ b/crates/djc-core/Cargo.toml @@ -1,5 +1,6 @@ [package] name = "djc-core" +description = "Singular Python API for Rust code used by django-components" version = "1.1.0" edition = "2021" diff --git a/crates/djc-core/src/lib.rs b/crates/djc-core/src/lib.rs index 111c825..dcc7135 100644 --- a/crates/djc-core/src/lib.rs +++ b/crates/djc-core/src/lib.rs @@ -1,9 +1,76 @@ -use djc_html_transformer::set_html_attributes; +use djc_html_transformer::{ + set_html_attributes as set_html_attributes_rust, HtmlTransformerConfig, +}; +use pyo3::exceptions::{PyValueError}; use pyo3::prelude::*; +use pyo3::types::{PyDict, PyTuple}; -/// A Python module implemented in Rust for high-performance transformations. +/// Singular Python API that brings togther all the other Rust crates. #[pymodule] fn djc_core(m: &Bound<'_, PyModule>) -> PyResult<()> { + // HTML transformer m.add_function(wrap_pyfunction!(set_html_attributes, m)?)?; Ok(()) } + +/// Transform HTML by adding attributes to the elements. +/// +/// Args: +/// html (str): The HTML string to transform. Can be a fragment or full document. +/// root_attributes (List[str]): List of attribute names to add to root elements only. +/// all_attributes (List[str]): List of attribute names to add to all elements. +/// check_end_names (bool, optional): Whether to validate matching of end tags. Defaults to false. +/// watch_on_attribute (str, optional): If set, captures which attributes were added to elements with this attribute. +/// +/// Returns: +/// Tuple[str, Dict[str, List[str]]]: A tuple containing: +/// - The transformed HTML string +/// - A dictionary mapping captured attribute values to lists of attributes that were added +/// to those elements. Only returned if watch_on_attribute is set, otherwise empty dict. +/// +/// Example: +/// >>> html = '

Hello

' +/// >>> html, captured = set_html_attributes(html, ['data-root-id'], ['data-v-123'], watch_on_attribute='data-id') +/// >>> print(captured) +/// {'123': ['data-root-id', 'data-v-123']} +/// +/// Raises: +/// ValueError: If the HTML is malformed or cannot be parsed. +#[pyfunction] +#[pyo3(signature = (html, root_attributes, all_attributes, check_end_names=None, watch_on_attribute=None))] +#[pyo3( + text_signature = "(html, root_attributes, all_attributes, *, check_end_names=False, watch_on_attribute=None)" +)] +pub fn set_html_attributes( + py: Python, + html: &str, + root_attributes: Vec, + all_attributes: Vec, + check_end_names: Option, + watch_on_attribute: Option, +) -> PyResult> { + let config = HtmlTransformerConfig::new( + root_attributes, + all_attributes, + check_end_names.unwrap_or(false), + watch_on_attribute, + ); + + match set_html_attributes_rust(html, &config) { + Ok((html, captured)) => { + // Convert captured attributes to a Python dictionary + let captured_dict = PyDict::new(py); + for (id, attrs) in captured { + captured_dict.set_item(id, attrs)?; + } + + // Convert items to Bound for the tuple + use pyo3::types::PyString; + let html_obj = PyString::new(py, &html).as_any().clone(); + let dict_obj = captured_dict.as_any().clone(); + let result = PyTuple::new(py, vec![html_obj, dict_obj])?; + Ok(result.into_any().unbind()) + } + Err(e) => Err(PyValueError::new_err(e.to_string())), + } +} diff --git a/crates/djc-html-transformer/Cargo.toml b/crates/djc-html-transformer/Cargo.toml index d47e672..e86db0d 100644 --- a/crates/djc-html-transformer/Cargo.toml +++ b/crates/djc-html-transformer/Cargo.toml @@ -1,8 +1,8 @@ [package] name = "djc-html-transformer" +description = "Apply attributes to HTML in a single pass" version = "1.0.3" edition = "2021" [dependencies] -pyo3 = { workspace = true } quick-xml = { workspace = true } diff --git a/crates/djc-html-transformer/src/lib.rs b/crates/djc-html-transformer/src/lib.rs index 844e002..fe97ee3 100644 --- a/crates/djc-html-transformer/src/lib.rs +++ b/crates/djc-html-transformer/src/lib.rs @@ -1,471 +1,25 @@ -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use pyo3::types::{PyDict, PyTuple}; -use quick_xml::events::{BytesStart, Event}; -use quick_xml::reader::Reader; -use quick_xml::writer::Writer; -use std::collections::HashSet; -use std::io::Cursor; +use transformer::{transform}; -// List of HTML5 void elements. These can be written as `` or ``, -//e.g. `
`, ``, ``, etc. -const VOID_ELEMENTS: [&str; 14] = [ - "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", - "track", "wbr", -]; +pub mod transformer; + +// Re-export the types that users need +pub use transformer::{HtmlTransformerConfig}; /// Transform HTML by adding attributes to the elements. /// +/// This is the pure Rust version that takes a configuration object. +/// /// Args: -/// html (str): The HTML string to transform. Can be a fragment or full document. -/// root_attributes (List[str]): List of attribute names to add to root elements only. -/// all_attributes (List[str]): List of attribute names to add to all elements. -/// check_end_names (bool, optional): Whether to validate matching of end tags. Defaults to false. -/// watch_on_attribute (str, optional): If set, captures which attributes were added to elements with this attribute. +/// html: The HTML string to transform. Can be a fragment or full document. +/// config: The HTML transformer configuration. /// /// Returns: -/// Tuple[str, Dict[str, List[str]]]: A tuple containing: -/// - The transformed HTML string -/// - A dictionary mapping captured attribute values to lists of attributes that were added -/// to those elements. Only returned if watch_on_attribute is set, otherwise empty dict. -/// -/// Example: -/// >>> html = '

Hello

' -/// >>> html, captured = set_html_attributes(html, ['data-root-id'], ['data-v-123'], watch_on_attribute='data-id') -/// >>> print(captured) -/// {'123': ['data-root-id', 'data-v-123']} -/// -/// Raises: -/// ValueError: If the HTML is malformed or cannot be parsed. -#[pyfunction] -#[pyo3(signature = (html, root_attributes, all_attributes, check_end_names=None, watch_on_attribute=None))] -#[pyo3( - text_signature = "(html, root_attributes, all_attributes, *, check_end_names=False, watch_on_attribute=None)" -)] +/// A Result containing either: +/// - Ok((html, captured)): A tuple with the transformed HTML and captured attributes +/// - Err(error): An error if the HTML is malformed or cannot be parsed. pub fn set_html_attributes( - py: Python, html: &str, - root_attributes: Vec, - all_attributes: Vec, - check_end_names: Option, - watch_on_attribute: Option, -) -> PyResult> { - let config = HtmlTransformerConfig::new( - root_attributes, - all_attributes, - check_end_names.unwrap_or(false), - watch_on_attribute, - ); - - match transform(&config, html) { - Ok((html, captured)) => { - // Convert captured attributes to a Python dictionary - let captured_dict = PyDict::new(py); - for (id, attrs) in captured { - captured_dict.set_item(id, attrs)?; - } - - // Convert items to Bound for the tuple - use pyo3::types::PyString; - let html_obj = PyString::new(py, &html).as_any().clone(); - let dict_obj = captured_dict.as_any().clone(); - let result = PyTuple::new(py, vec![html_obj, dict_obj])?; - Ok(result.into_any().unbind()) - } - Err(e) => Err(PyValueError::new_err(e.to_string())), - } -} - -/// Configuration for HTML transformation -pub struct HtmlTransformerConfig { - root_attributes: Vec, - all_attributes: Vec, - void_elements: HashSet, - check_end_names: bool, - watch_on_attribute: Option, -} - -impl HtmlTransformerConfig { - pub fn new( - root_attributes: Vec, - all_attributes: Vec, - check_end_names: bool, - watch_on_attribute: Option, - ) -> Self { - let void_elements = VOID_ELEMENTS.iter().map(|&s| s.to_string()).collect(); - - HtmlTransformerConfig { - root_attributes, - all_attributes, - void_elements, - check_end_names, - watch_on_attribute, - } - } -} - -/// Add attributes to a HTML start tag (e.g. `
`) based on the configuration -fn add_attributes( - config: &HtmlTransformerConfig, - element: &mut BytesStart, - is_root: bool, - captured_attributes: &mut Vec<(String, Vec)>, -) { - let mut added_attrs = Vec::new(); - - // Add root attributes if this is a root element - if is_root { - for attr in &config.root_attributes { - element.push_attribute((attr.as_str(), "")); - added_attrs.push(attr.clone()); - } - } - - // Add attributes that should be applied to all elements - for attr in &config.all_attributes { - element.push_attribute((attr.as_str(), "")); - added_attrs.push(attr.clone()); - } - - // If we're watching for a specific attribute, check if this element has it - if let Some(watch_attr) = &config.watch_on_attribute { - if let Some(attr_value) = element - .attributes() - .find(|a| { - if let Ok(attr) = a { - String::from_utf8_lossy(attr.key.as_ref()) == *watch_attr - } else { - false - } - }) - .and_then(|a| a.ok()) - .map(|a| String::from_utf8_lossy(a.value.as_ref()).into_owned()) - { - captured_attributes.push((attr_value, added_attrs)); - } - } -} - -/// Main entrypoint. Transform HTML by adding attributes to the elements. -pub fn transform( config: &HtmlTransformerConfig, - html: &str, ) -> Result<(String, Vec<(String, Vec)>), Box> { - let mut reader = Reader::from_str(html); - let reader_config = reader.config_mut(); - reader_config.check_end_names = config.check_end_names; - // Allow bare & in HTML content (e.g. "Hello & Welcome" instead of requiring "Hello & Welcome") - // This is needed for compatibility with HTML5 which is more lenient than strict XML - reader_config.allow_dangling_amp = true; - - // We transform the HTML by reading it and writing it simultaneously - let mut writer = Writer::new(Cursor::new(Vec::new())); - let mut captured_attributes = Vec::new(); - - // Track the nesting depth of elements to identify root elements (depth == 0) - let mut depth: i32 = 0; - - // Read the HTML event by event - loop { - match reader.read_event() { - // Start tag - Ok(Event::Start(e)) => { - let tag_name = String::from_utf8_lossy(e.name().as_ref()) - .to_string() - .to_lowercase(); - let mut elem = e.into_owned(); - add_attributes(config, &mut elem, depth == 0, &mut captured_attributes); - - // For void elements, write as Empty event - if config.void_elements.contains(&tag_name) { - writer.write_event(Event::Empty(elem))?; - } else { - writer.write_event(Event::Start(elem))?; - depth += 1; - } - } - - // End tag - Ok(Event::End(e)) => { - let tag_name = String::from_utf8_lossy(e.name().as_ref()) - .to_string() - .to_lowercase(); - - // Skip end tags for void elements - if !config.void_elements.contains(&tag_name) { - writer.write_event(Event::End(e))?; - depth -= 1; - } - } - - // Empty element (AKA void or self-closing tag, e.g. `
`) - Ok(Event::Empty(e)) => { - let mut elem = e.into_owned(); - add_attributes(config, &mut elem, depth == 0, &mut captured_attributes); - writer.write_event(Event::Empty(elem))?; - } - - // End of file - Ok(Event::Eof) => break, - // Other events (e.g. comments, processing instructions, etc.) - Ok(e) => writer.write_event(e)?, - Err(e) => return Err(Box::new(e)), - } - } - - // Convert the transformed HTML to a string - let result = String::from_utf8(writer.into_inner().into_inner())?; - Ok((result, captured_attributes)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic_transformation() { - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-all".to_string()], - false, - None, - ); - - let input = "

Hello

"; - let (result, _) = transform(&config, input).unwrap(); - - assert!(result.contains("data-root")); - assert!(result.contains("data-all")); - } - - #[test] - fn test_multiple_roots() { - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-all".to_string()], - false, - None, - ); - - let input = "
First
Second"; - let (result, _) = transform(&config, input).unwrap(); - - // Both root elements should have data-root - assert_eq!(result.matches("data-root").count(), 2); - // All elements should have data-all - assert_eq!(result.matches("data-all").count(), 2); - } - - #[test] - fn test_complex_html() { - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-all".to_string(), "data-v-123".to_string()], - false, - None, - ); - - let input = r#" -
-
-

Hello & Welcome

- -
-
-
-

Article 1

-

Some text with bold and emphasis

- Test Image -
-
-
-
-

© 2024

-
- "#; - - let (result, _) = transform(&config, input).unwrap(); - - // Check root elements have root attributes - assert!(result.contains( - r#"
"# - )); - assert!(result.contains(r#"
"#)); - - // Check nested elements have all_attributes but not root_attributes - assert!(result.contains(r#"

"#)); - assert!(result.contains(r#"

")); - } - - #[test] - fn test_html_head_with_meta() { - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-v-123".to_string()], - false, - None, - ); - - let input = r#" - - - Test Page - - - "#; - - let (result, _) = transform(&config, input).unwrap(); - - // Check that it parsed successfully - assert!(result.contains(r#"Test Page"#)); - assert!(result.contains(r#"")); - assert!(!result.contains("")); - assert!(result.contains("/>")); - } - - #[test] - fn test_config_check_end_names() { - // Test with check_end_names = false (lenient mode) - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-v-123".to_string()], - false, // Don't check end names - None, - ); - - // These should parse successfully with check_end_names = false - let lenient_cases = [ - "

Hello

", // Mismatched nesting - "
Text", // Wrong closing tag - "

Text", // Non-matching end tag - ]; - - for input in lenient_cases { - assert!(transform(&config, input).is_ok()); - } - - // Test with check_end_names = true (strict mode) - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-v-123".to_string()], - true, // Check end names - None, - ); - - // These should fail with check_end_names = true - for input in lenient_cases { - assert!(transform(&config, input).is_err()); - } - - // But well-formed HTML should still work - let valid_input = "

Hello

"; - assert!(transform(&config, valid_input).is_ok()); - } - - #[test] - fn test_watch_attribute() { - let config = HtmlTransformerConfig::new( - vec!["data-root".to_string()], - vec!["data-v-123".to_string()], - false, - Some("data-id".to_string()), - ); - - let input = r#" -
-

Regular element

- Nested element - -
"#; - - let (result, captured) = transform(&config, input).unwrap(); - - println!("result: {}", result); - println!("captured: {:?}", captured); - - // Verify HTML transformation worked - assert!(result.contains(r#"
"#)); - assert!(result.contains(r#""#)); - assert!(result.contains(r#""#)); - - // Verify attribute capturing - assert_eq!(captured.len(), 3); - assert!(captured.iter().any(|(id, attrs)| id == "123" - && attrs.contains(&"data-root".to_string()) - && attrs.contains(&"data-v-123".to_string()))); - assert!(captured - .iter() - .any(|(id, attrs)| id == "456" && attrs.contains(&"data-v-123".to_string()))); - assert!(captured - .iter() - .any(|(id, attrs)| id == "789" && attrs.contains(&"data-v-123".to_string()))); - } + transform(config, html) } diff --git a/crates/djc-html-transformer/src/transformer.rs b/crates/djc-html-transformer/src/transformer.rs new file mode 100644 index 0000000..52c4fc3 --- /dev/null +++ b/crates/djc-html-transformer/src/transformer.rs @@ -0,0 +1,406 @@ +use quick_xml::events::{BytesStart, Event}; +use quick_xml::reader::Reader; +use quick_xml::writer::Writer; +use std::collections::HashSet; +use std::io::Cursor; + +// List of HTML5 void elements. These can be written as `` or ``, +//e.g. `
`, ``, ``, etc. +const VOID_ELEMENTS: [&str; 14] = [ + "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", + "track", "wbr", +]; + +/// Configuration for HTML transformation +pub struct HtmlTransformerConfig { + root_attributes: Vec, + all_attributes: Vec, + void_elements: HashSet, + check_end_names: bool, + watch_on_attribute: Option, +} + +impl HtmlTransformerConfig { + pub fn new( + root_attributes: Vec, + all_attributes: Vec, + check_end_names: bool, + watch_on_attribute: Option, + ) -> Self { + let void_elements = VOID_ELEMENTS.iter().map(|&s| s.to_string()).collect(); + + HtmlTransformerConfig { + root_attributes, + all_attributes, + void_elements, + check_end_names, + watch_on_attribute, + } + } +} + +/// Add attributes to a HTML start tag (e.g. `
`) based on the configuration +fn add_attributes( + config: &HtmlTransformerConfig, + element: &mut BytesStart, + is_root: bool, + captured_attributes: &mut Vec<(String, Vec)>, +) { + let mut added_attrs = Vec::new(); + + // Add root attributes if this is a root element + if is_root { + for attr in &config.root_attributes { + element.push_attribute((attr.as_str(), "")); + added_attrs.push(attr.clone()); + } + } + + // Add attributes that should be applied to all elements + for attr in &config.all_attributes { + element.push_attribute((attr.as_str(), "")); + added_attrs.push(attr.clone()); + } + + // If we're watching for a specific attribute, check if this element has it + if let Some(watch_attr) = &config.watch_on_attribute { + if let Some(attr_value) = element + .attributes() + .find(|a| { + if let Ok(attr) = a { + String::from_utf8_lossy(attr.key.as_ref()) == *watch_attr + } else { + false + } + }) + .and_then(|a| a.ok()) + .map(|a| String::from_utf8_lossy(a.value.as_ref()).into_owned()) + { + captured_attributes.push((attr_value, added_attrs)); + } + } +} + +/// Main entrypoint. Transform HTML by adding attributes to the elements. +pub fn transform( + config: &HtmlTransformerConfig, + html: &str, +) -> Result<(String, Vec<(String, Vec)>), Box> { + let mut reader = Reader::from_str(html); + let reader_config = reader.config_mut(); + reader_config.check_end_names = config.check_end_names; + // Allow bare & in HTML content (e.g. "Hello & Welcome" instead of requiring "Hello & Welcome") + // This is needed for compatibility with HTML5 which is more lenient than strict XML + reader_config.allow_dangling_amp = true; + + // We transform the HTML by reading it and writing it simultaneously + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut captured_attributes = Vec::new(); + + // Track the nesting depth of elements to identify root elements (depth == 0) + let mut depth: i32 = 0; + + // Read the HTML event by event + loop { + match reader.read_event() { + // Start tag + Ok(Event::Start(e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()) + .to_string() + .to_lowercase(); + let mut elem = e.into_owned(); + add_attributes(config, &mut elem, depth == 0, &mut captured_attributes); + + // For void elements, write as Empty event + if config.void_elements.contains(&tag_name) { + writer.write_event(Event::Empty(elem))?; + } else { + writer.write_event(Event::Start(elem))?; + depth += 1; + } + } + + // End tag + Ok(Event::End(e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()) + .to_string() + .to_lowercase(); + + // Skip end tags for void elements + if !config.void_elements.contains(&tag_name) { + writer.write_event(Event::End(e))?; + depth -= 1; + } + } + + // Empty element (AKA void or self-closing tag, e.g. `
`) + Ok(Event::Empty(e)) => { + let mut elem = e.into_owned(); + add_attributes(config, &mut elem, depth == 0, &mut captured_attributes); + writer.write_event(Event::Empty(elem))?; + } + + // End of file + Ok(Event::Eof) => break, + // Other events (e.g. comments, processing instructions, etc.) + Ok(e) => writer.write_event(e)?, + Err(e) => return Err(Box::new(e)), + } + } + + // Convert the transformed HTML to a string + let result = String::from_utf8(writer.into_inner().into_inner())?; + Ok((result, captured_attributes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_transformation() { + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-all".to_string()], + false, + None, + ); + + let input = "

Hello

"; + let (result, _) = transform(&config, input).unwrap(); + + assert!(result.contains("data-root")); + assert!(result.contains("data-all")); + } + + #[test] + fn test_multiple_roots() { + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-all".to_string()], + false, + None, + ); + + let input = "
First
Second"; + let (result, _) = transform(&config, input).unwrap(); + + // Both root elements should have data-root + assert_eq!(result.matches("data-root").count(), 2); + // All elements should have data-all + assert_eq!(result.matches("data-all").count(), 2); + } + + #[test] + fn test_complex_html() { + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-all".to_string(), "data-v-123".to_string()], + false, + None, + ); + + let input = r#" +
+
+

Hello & Welcome

+ +
+
+
+

Article 1

+

Some text with bold and emphasis

+ Test Image +
+
+
+
+

© 2024

+
+ "#; + + let (result, _) = transform(&config, input).unwrap(); + + // Check root elements have root attributes + assert!(result.contains( + r#"
"# + )); + assert!(result.contains(r#"
"#)); + + // Check nested elements have all_attributes but not root_attributes + assert!(result.contains(r#"

"#)); + assert!(result.contains(r#"

")); + } + + #[test] + fn test_html_head_with_meta() { + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-v-123".to_string()], + false, + None, + ); + + let input = r#" + + + Test Page + + + "#; + + let (result, _) = transform(&config, input).unwrap(); + + // Check that it parsed successfully + assert!(result.contains(r#"Test Page"#)); + assert!(result.contains(r#"")); + assert!(!result.contains("")); + assert!(result.contains("/>")); + } + + #[test] + fn test_config_check_end_names() { + // Test with check_end_names = false (lenient mode) + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-v-123".to_string()], + false, // Don't check end names + None, + ); + + // These should parse successfully with check_end_names = false + let lenient_cases = [ + "

Hello

", // Mismatched nesting + "
Text", // Wrong closing tag + "

Text", // Non-matching end tag + ]; + + for input in lenient_cases { + assert!(transform(&config, input).is_ok()); + } + + // Test with check_end_names = true (strict mode) + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-v-123".to_string()], + true, // Check end names + None, + ); + + // These should fail with check_end_names = true + for input in lenient_cases { + assert!(transform(&config, input).is_err()); + } + + // But well-formed HTML should still work + let valid_input = "

Hello

"; + assert!(transform(&config, valid_input).is_ok()); + } + + #[test] + fn test_watch_attribute() { + let config = HtmlTransformerConfig::new( + vec!["data-root".to_string()], + vec!["data-v-123".to_string()], + false, + Some("data-id".to_string()), + ); + + let input = r#" +
+

Regular element

+ Nested element + +
"#; + + let (result, captured) = transform(&config, input).unwrap(); + + println!("result: {}", result); + println!("captured: {:?}", captured); + + // Verify HTML transformation worked + assert!(result.contains(r#"
"#)); + assert!(result.contains(r#""#)); + assert!(result.contains(r#""#)); + + // Verify attribute capturing + assert_eq!(captured.len(), 3); + assert!(captured.iter().any(|(id, attrs)| id == "123" + && attrs.contains(&"data-root".to_string()) + && attrs.contains(&"data-v-123".to_string()))); + assert!(captured + .iter() + .any(|(id, attrs)| id == "456" && attrs.contains(&"data-v-123".to_string()))); + assert!(captured + .iter() + .any(|(id, attrs)| id == "789" && attrs.contains(&"data-v-123".to_string()))); + } +} diff --git a/djc_core/__init__.py b/djc_core/__init__.py index 68add3b..2dfb9ee 100644 --- a/djc_core/__init__.py +++ b/djc_core/__init__.py @@ -1,6 +1,6 @@ -# DO NOT MODIFY, ONLY UPDATE THE MODULE NAME WHEN NEEDED! # This file is what maturin auto-generates. But it seems maturin omits it when we have a __init__.pyi file. # So we have to manually include it here. +# Following block of code is what maturin would've generated from .djc_core import * diff --git a/djc_core/__init__.pyi b/djc_core/__init__.pyi index eeeff7c..1774962 100644 --- a/djc_core/__init__.pyi +++ b/djc_core/__init__.pyi @@ -1,34 +1 @@ -from typing import List, Dict, Optional - -def set_html_attributes( - html: str, - root_attributes: List[str], - all_attributes: List[str], - check_end_names: Optional[bool] = None, - watch_on_attribute: Optional[str] = None, -) -> tuple[str, Dict[str, List[str]]]: - """ - Transform HTML by adding attributes to root and all elements. - - Args: - html (str): The HTML string to transform. Can be a fragment or full document. - root_attributes (List[str]): List of attribute names to add to root elements only. - all_attributes (List[str]): List of attribute names to add to all elements. - check_end_names (Optional[bool]): Whether to validate matching of end tags. Defaults to None. - watch_on_attribute (Optional[str]): If set, captures which attributes were added to elements with this attribute. - - Returns: - A tuple containing: - - The transformed HTML string - - A dictionary mapping captured attribute values to lists of attributes that were added - to those elements. Only returned if watch_on_attribute is set, otherwise empty dict. - - Example: - >>> html = '

Hello

' - >>> set_html_attributes(html, ['data-root-id'], ['data-v-123']) - '

Hello

' - - Raises: - ValueError: If the HTML is malformed or cannot be parsed. - """ - ... +from djc_core.djc_html_transformer import * diff --git a/djc_core/djc_html_transformer.pyi b/djc_core/djc_html_transformer.pyi new file mode 100644 index 0000000..a8e1f47 --- /dev/null +++ b/djc_core/djc_html_transformer.pyi @@ -0,0 +1,36 @@ +from typing import List, Dict, Optional + +def set_html_attributes( + html: str, + root_attributes: List[str], + all_attributes: List[str], + check_end_names: Optional[bool] = None, + watch_on_attribute: Optional[str] = None, +) -> tuple[str, Dict[str, List[str]]]: + """ + Transform HTML by adding attributes to root and all elements. + + Args: + html (str): The HTML string to transform. Can be a fragment or full document. + root_attributes (List[str]): List of attribute names to add to root elements only. + all_attributes (List[str]): List of attribute names to add to all elements. + check_end_names (Optional[bool]): Whether to validate matching of end tags. Defaults to None. + watch_on_attribute (Optional[str]): If set, captures which attributes were added to elements with this attribute. + + Returns: + A tuple containing: + - The transformed HTML string + - A dictionary mapping captured attribute values to lists of attributes that were added + to those elements. Only returned if watch_on_attribute is set, otherwise empty dict. + + Example: + >>> html = '

Hello

' + >>> set_html_attributes(html, ['data-root-id'], ['data-v-123']) + '

Hello

' + + Raises: + ValueError: If the HTML is malformed or cannot be parsed. + """ + ... + +__all__ = ["set_html_attributes"] From e3021c13500d0f0b8d4ff99a697319d20bb28b03 Mon Sep 17 00:00:00 2001 From: Juro Oravec Date: Thu, 23 Oct 2025 09:52:06 +0200 Subject: [PATCH 2/3] docs: update README --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index b8ebc79..dd1110d 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,24 @@ print(captured) # } ``` +## Architecture + +This project uses a multi-crate Rust workspace structure to maintain clean separation of concerns: + +### Crate Structure + +- **`djc-html-transformer`**: Pure Rust library for HTML transformation +- **`djc-template-parser`**: Pure Rust library for Django template parsing +- **`djc-core`**: Python bindings that combine all other libraries + +### Design Philosophy + +To make sense of the code and keep it clean, the Python API and Rust logic are defined separately: + +1. Each crate (AKA Rust package) has `lib.rs` (which is like Python's `__init__.py`). These files do not define the main logic, but only the public API of the crate. So the API that's to be used by other crates. +2. The `djc-core` crate imports other crates +3. And it is only this `djc-core` where we define the Python API using PyO3. + ## Development 1. Setup python env From 3a166422a5068a059b0668417bb60b1c147c9dc7 Mon Sep 17 00:00:00 2001 From: Juro Oravec Date: Thu, 23 Oct 2025 09:53:33 +0200 Subject: [PATCH 3/3] docs: minor fixes --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index dd1110d..52bbb5c 100644 --- a/README.md +++ b/README.md @@ -76,15 +76,15 @@ print(captured) This project uses a multi-crate Rust workspace structure to maintain clean separation of concerns: -### Crate Structure +### Crate structure - **`djc-html-transformer`**: Pure Rust library for HTML transformation - **`djc-template-parser`**: Pure Rust library for Django template parsing -- **`djc-core`**: Python bindings that combine all other libraries +- **`djc-core`**: Python bindings that combines all other libraries -### Design Philosophy +### Design philosophy -To make sense of the code and keep it clean, the Python API and Rust logic are defined separately: +To make sense of the code, the Python API and Rust logic are defined separately: 1. Each crate (AKA Rust package) has `lib.rs` (which is like Python's `__init__.py`). These files do not define the main logic, but only the public API of the crate. So the API that's to be used by other crates. 2. The `djc-core` crate imports other crates