From 732a7a7543012dd5022fc821895d58afb27b7fd7 Mon Sep 17 00:00:00 2001
From: Juro Oravec <juraj.oravec.josefson@gmail.com>
Date: Thu, 23 Oct 2025 09:35:13 +0200
Subject: [PATCH 1/3] refactor: move python public API to djc-core create

---
 Cargo.toml                                    |   8 +-
 crates/djc-core/Cargo.toml                    |   1 +
 crates/djc-core/src/lib.rs                    |  71 ++-
 crates/djc-html-transformer/Cargo.toml        |   2 +-
 crates/djc-html-transformer/src/lib.rs        | 472 +-----------------
 .../djc-html-transformer/src/transformer.rs   | 406 +++++++++++++++
 djc_core/__init__.py                          |   2 +-
 djc_core/__init__.pyi                         |  35 +-
 djc_core/djc_html_transformer.pyi             |  36 ++
 9 files changed, 532 insertions(+), 501 deletions(-)
 create mode 100644 crates/djc-html-transformer/src/transformer.rs
 create mode 100644 djc_core/djc_html_transformer.pyi
diff --git a/Cargo.toml b/Cargo.toml
index f054fd4..4f41e57 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,11 +6,11 @@ members = [
 resolver = "2"
 
 [workspace.dependencies]
-pyo3 = { version = "0.27.0", features = ["extension-module"] }
+pyo3 = { version = "0.27.1", features = ["extension-module"] }
 quick-xml = "0.38.3"
 
 # https://ohadravid.github.io/posts/2023-03-rusty-python
 [profile.release]
-debug = true       # Debug symbols for profiler.
-lto = true         # Link-time optimization.
-codegen-units = 1  # Slower compilation but faster code. 
+debug = true      # Debug symbols for profiler.
+lto = true        # Link-time optimization.
+codegen-units = 1 # Slower compilation but faster code.
diff --git a/crates/djc-core/Cargo.toml b/crates/djc-core/Cargo.toml
index dea3d94..c8ed537 100644
--- a/crates/djc-core/Cargo.toml
+++ b/crates/djc-core/Cargo.toml
@@ -1,5 +1,6 @@
 [package]
 name = "djc-core"
+description = "Singular Python API for Rust code used by django-components"
 version = "1.1.0"
 edition = "2021"
 
diff --git a/crates/djc-core/src/lib.rs b/crates/djc-core/src/lib.rs
index 111c825..dcc7135 100644
--- a/crates/djc-core/src/lib.rs
+++ b/crates/djc-core/src/lib.rs
@@ -1,9 +1,76 @@
-use djc_html_transformer::set_html_attributes;
+use djc_html_transformer::{
+    set_html_attributes as set_html_attributes_rust, HtmlTransformerConfig,
+};
+use pyo3::exceptions::{PyValueError};
 use pyo3::prelude::*;
+use pyo3::types::{PyDict, PyTuple};
 
-/// A Python module implemented in Rust for high-performance transformations.
+/// Singular Python API that brings togther all the other Rust crates.
 #[pymodule]
 fn djc_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    // HTML transformer
     m.add_function(wrap_pyfunction!(set_html_attributes, m)?)?;
     Ok(())
 }
+
+/// Transform HTML by adding attributes to the elements.
+///
+/// Args:
+///     html (str): The HTML string to transform. Can be a fragment or full document.
+///     root_attributes (List[str]): List of attribute names to add to root elements only.
+///     all_attributes (List[str]): List of attribute names to add to all elements.
+///     check_end_names (bool, optional): Whether to validate matching of end tags. Defaults to false.
+///     watch_on_attribute (str, optional): If set, captures which attributes were added to elements with this attribute.
+///
+/// Returns:
+///     Tuple[str, Dict[str, List[str]]]: A tuple containing:
+///         - The transformed HTML string
+///         - A dictionary mapping captured attribute values to lists of attributes that were added
+///           to those elements. Only returned if watch_on_attribute is set, otherwise empty dict.
+///
+/// Example:
+///     >>> html = '<div data-id="123"><p>Hello</p></div>'
+///     >>> html, captured = set_html_attributes(html, ['data-root-id'], ['data-v-123'], watch_on_attribute='data-id')
+///     >>> print(captured)
+///     {'123': ['data-root-id', 'data-v-123']}
+///
+/// Raises:
+///     ValueError: If the HTML is malformed or cannot be parsed.
+#[pyfunction]
+#[pyo3(signature = (html, root_attributes, all_attributes, check_end_names=None, watch_on_attribute=None))]
+#[pyo3(
+    text_signature = "(html, root_attributes, all_attributes, *, check_end_names=False, watch_on_attribute=None)"
+)]
+pub fn set_html_attributes(
+    py: Python,
+    html: &str,
+    root_attributes: Vec<String>,
+    all_attributes: Vec<String>,
+    check_end_names: Option<bool>,
+    watch_on_attribute: Option<String>,
+) -> PyResult<Py<PyAny>> {
+    let config = HtmlTransformerConfig::new(
+        root_attributes,
+        all_attributes,
+        check_end_names.unwrap_or(false),
+        watch_on_attribute,
+    );
+
+    match set_html_attributes_rust(html, &config) {
+        Ok((html, captured)) => {
+            // Convert captured attributes to a Python dictionary
+            let captured_dict = PyDict::new(py);
+            for (id, attrs) in captured {
+                captured_dict.set_item(id, attrs)?;
+            }
+
+            // Convert items to Bound<PyAny> for the tuple
+            use pyo3::types::PyString;
+            let html_obj = PyString::new(py, &html).as_any().clone();
+            let dict_obj = captured_dict.as_any().clone();
+            let result = PyTuple::new(py, vec![html_obj, dict_obj])?;
+            Ok(result.into_any().unbind())
+        }
+        Err(e) => Err(PyValueError::new_err(e.to_string())),
+    }
+}
diff --git a/crates/djc-html-transformer/Cargo.toml b/crates/djc-html-transformer/Cargo.toml
index d47e672..e86db0d 100644
--- a/crates/djc-html-transformer/Cargo.toml
+++ b/crates/djc-html-transformer/Cargo.toml
@@ -1,8 +1,8 @@
 [package]
 name = "djc-html-transformer"
+description = "Apply attributes to HTML in a single pass"
 version = "1.0.3"
 edition = "2021"
 
 [dependencies]
-pyo3 = { workspace = true }
 quick-xml = { workspace = true }
diff --git a/crates/djc-html-transformer/src/lib.rs b/crates/djc-html-transformer/src/lib.rs
index 844e002..fe97ee3 100644
--- a/crates/djc-html-transformer/src/lib.rs
+++ b/crates/djc-html-transformer/src/lib.rs
@@ -1,471 +1,25 @@
-use pyo3::exceptions::PyValueError;
-use pyo3::prelude::*;
-use pyo3::types::{PyDict, PyTuple};
-use quick_xml::events::{BytesStart, Event};
-use quick_xml::reader::Reader;
-use quick_xml::writer::Writer;
-use std::collections::HashSet;
-use std::io::Cursor;
+use transformer::{transform};
 
-// List of HTML5 void elements. These can be written as `<tag>` or `<tag />`,
-//e.g. `<br />`, `<link />`, `<img />`, etc.
-const VOID_ELEMENTS: [&str; 14] = [
-    "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source",
-    "track", "wbr",
-];
+pub mod transformer;
+
+// Re-export the types that users need
+pub use transformer::{HtmlTransformerConfig};
 
 /// Transform HTML by adding attributes to the elements.
 ///
+/// This is the pure Rust version that takes a configuration object.
+///
 /// Args:
-///     html (str): The HTML string to transform. Can be a fragment or full document.
-///     root_attributes (List[str]): List of attribute names to add to root elements only.
-///     all_attributes (List[str]): List of attribute names to add to all elements.
-///     check_end_names (bool, optional): Whether to validate matching of end tags. Defaults to false.
-///     watch_on_attribute (str, optional): If set, captures which attributes were added to elements with this attribute.
+///     html: The HTML string to transform. Can be a fragment or full document.
+///     config: The HTML transformer configuration.
 ///
 /// Returns:
-///     Tuple[str, Dict[str, List[str]]]: A tuple containing:
-///         - The transformed HTML string
-///         - A dictionary mapping captured attribute values to lists of attributes that were added
-///           to those elements. Only returned if watch_on_attribute is set, otherwise empty dict.
-///
-/// Example:
-///     >>> html = '<div data-id="123"><p>Hello</p></div>'
-///     >>> html, captured = set_html_attributes(html, ['data-root-id'], ['data-v-123'], watch_on_attribute='data-id')
-///     >>> print(captured)
-///     {'123': ['data-root-id', 'data-v-123']}
-///
-/// Raises:
-///     ValueError: If the HTML is malformed or cannot be parsed.
-#[pyfunction]
-#[pyo3(signature = (html, root_attributes, all_attributes, check_end_names=None, watch_on_attribute=None))]
-#[pyo3(
-    text_signature = "(html, root_attributes, all_attributes, *, check_end_names=False, watch_on_attribute=None)"
-)]
+///     A Result containing either:
+///     - Ok((html, captured)): A tuple with the transformed HTML and captured attributes
+///     - Err(error): An error if the HTML is malformed or cannot be parsed.
 pub fn set_html_attributes(
-    py: Python,
     html: &str,
-    root_attributes: Vec<String>,
-    all_attributes: Vec<String>,
-    check_end_names: Option<bool>,
-    watch_on_attribute: Option<String>,
-) -> PyResult<Py<PyAny>> {
-    let config = HtmlTransformerConfig::new(
-        root_attributes,
-        all_attributes,
-        check_end_names.unwrap_or(false),
-        watch_on_attribute,
-    );
-
-    match transform(&config, html) {
-        Ok((html, captured)) => {
-            // Convert captured attributes to a Python dictionary
-            let captured_dict = PyDict::new(py);
-            for (id, attrs) in captured {
-                captured_dict.set_item(id, attrs)?;
-            }
-
-            // Convert items to Bound<PyAny> for the tuple
-            use pyo3::types::PyString;
-            let html_obj = PyString::new(py, &html).as_any().clone();
-            let dict_obj = captured_dict.as_any().clone();
-            let result = PyTuple::new(py, vec![html_obj, dict_obj])?;
-            Ok(result.into_any().unbind())
-        }
-        Err(e) => Err(PyValueError::new_err(e.to_string())),
-    }
-}
-
-/// Configuration for HTML transformation
-pub struct HtmlTransformerConfig {
-    root_attributes: Vec<String>,
-    all_attributes: Vec<String>,
-    void_elements: HashSet<String>,
-    check_end_names: bool,
-    watch_on_attribute: Option<String>,
-}
-
-impl HtmlTransformerConfig {
-    pub fn new(
-        root_attributes: Vec<String>,
-        all_attributes: Vec<String>,
-        check_end_names: bool,
-        watch_on_attribute: Option<String>,
-    ) -> Self {
-        let void_elements = VOID_ELEMENTS.iter().map(|&s| s.to_string()).collect();
-
-        HtmlTransformerConfig {
-            root_attributes,
-            all_attributes,
-            void_elements,
-            check_end_names,
-            watch_on_attribute,
-        }
-    }
-}
-
-/// Add attributes to a HTML start tag (e.g. `<div>`) based on the configuration
-fn add_attributes(
-    config: &HtmlTransformerConfig,
-    element: &mut BytesStart,
-    is_root: bool,
-    captured_attributes: &mut Vec<(String, Vec<String>)>,
-) {
-    let mut added_attrs = Vec::new();
-
-    // Add root attributes if this is a root element
-    if is_root {
-        for attr in &config.root_attributes {
-            element.push_attribute((attr.as_str(), ""));
-            added_attrs.push(attr.clone());
-        }
-    }
-
-    // Add attributes that should be applied to all elements
-    for attr in &config.all_attributes {
-        element.push_attribute((attr.as_str(), ""));
-        added_attrs.push(attr.clone());
-    }
-
-    // If we're watching for a specific attribute, check if this element has it
-    if let Some(watch_attr) = &config.watch_on_attribute {
-        if let Some(attr_value) = element
-            .attributes()
-            .find(|a| {
-                if let Ok(attr) = a {
-                    String::from_utf8_lossy(attr.key.as_ref()) == *watch_attr
-                } else {
-                    false
-                }
-            })
-            .and_then(|a| a.ok())
-            .map(|a| String::from_utf8_lossy(a.value.as_ref()).into_owned())
-        {
-            captured_attributes.push((attr_value, added_attrs));
-        }
-    }
-}
-
-/// Main entrypoint. Transform HTML by adding attributes to the elements.
-pub fn transform(
     config: &HtmlTransformerConfig,
-    html: &str,
 ) -> Result<(String, Vec<(String, Vec<String>)>), Box<dyn std::error::Error>> {
-    let mut reader = Reader::from_str(html);
-    let reader_config = reader.config_mut();
-    reader_config.check_end_names = config.check_end_names;
-    // Allow bare & in HTML content (e.g. "Hello & Welcome" instead of requiring "Hello &amp; Welcome")
-    // This is needed for compatibility with HTML5 which is more lenient than strict XML
-    reader_config.allow_dangling_amp = true;
-
-    // We transform the HTML by reading it and writing it simultaneously
-    let mut writer = Writer::new(Cursor::new(Vec::new()));
-    let mut captured_attributes = Vec::new();
-
-    // Track the nesting depth of elements to identify root elements (depth == 0)
-    let mut depth: i32 = 0;
-
-    // Read the HTML event by event
-    loop {
-        match reader.read_event() {
-            // Start tag
-            Ok(Event::Start(e)) => {
-                let tag_name = String::from_utf8_lossy(e.name().as_ref())
-                    .to_string()
-                    .to_lowercase();
-                let mut elem = e.into_owned();
-                add_attributes(config, &mut elem, depth == 0, &mut captured_attributes);
-
-                // For void elements, write as Empty event
-                if config.void_elements.contains(&tag_name) {
-                    writer.write_event(Event::Empty(elem))?;
-                } else {
-                    writer.write_event(Event::Start(elem))?;
-                    depth += 1;
-                }
-            }
-
-            // End tag
-            Ok(Event::End(e)) => {
-                let tag_name = String::from_utf8_lossy(e.name().as_ref())
-                    .to_string()
-                    .to_lowercase();
-
-                // Skip end tags for void elements
-                if !config.void_elements.contains(&tag_name) {
-                    writer.write_event(Event::End(e))?;
-                    depth -= 1;
-                }
-            }
-
-            // Empty element (AKA void or self-closing tag, e.g. `<br />`)
-            Ok(Event::Empty(e)) => {
-                let mut elem = e.into_owned();
-                add_attributes(config, &mut elem, depth == 0, &mut captured_attributes);
-                writer.write_event(Event::Empty(elem))?;
-            }
-
-            // End of file
-            Ok(Event::Eof) => break,
-            // Other events (e.g. comments, processing instructions, etc.)
-            Ok(e) => writer.write_event(e)?,
-            Err(e) => return Err(Box::new(e)),
-        }
-    }
-
-    // Convert the transformed HTML to a string
-    let result = String::from_utf8(writer.into_inner().into_inner())?;
-    Ok((result, captured_attributes))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_basic_transformation() {
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-all".to_string()],
-            false,
-            None,
-        );
-
-        let input = "<div><p>Hello</p></div>";
-        let (result, _) = transform(&config, input).unwrap();
-
-        assert!(result.contains("data-root"));
-        assert!(result.contains("data-all"));
-    }
-
-    #[test]
-    fn test_multiple_roots() {
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-all".to_string()],
-            false,
-            None,
-        );
-
-        let input = "<div>First</div><span>Second</span>";
-        let (result, _) = transform(&config, input).unwrap();
-
-        // Both root elements should have data-root
-        assert_eq!(result.matches("data-root").count(), 2);
-        // All elements should have data-all
-        assert_eq!(result.matches("data-all").count(), 2);
-    }
-
-    #[test]
-    fn test_complex_html() {
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-all".to_string(), "data-v-123".to_string()],
-            false,
-            None,
-        );
-
-        let input = r#"
-            <div class="container" id="main">
-                <header class="flex">
-                    <h1 title="Main Title">Hello & Welcome</h1>
-                    <nav data-existing="true">
-                        <a href="/home">Home</a>
-                        <a href="/about" class="active">About</a>
-                    </nav>
-                </header>
-                <main>
-                    <article data-existing="true">
-                        <h2>Article 1</h2>
-                        <p>Some text with <strong>bold</strong> and <em>emphasis</em></p>
-                        <img src="test.jpg" alt="Test Image"/>
-                    </article>
-                </main>
-            </div>
-            <footer id="footer">
-                <p>&copy; 2024</p>
-            </footer>
-        "#;
-
-        let (result, _) = transform(&config, input).unwrap();
-
-        // Check root elements have root attributes
-        assert!(result.contains(
-            r#"<div class="container" id="main" data-root="" data-all="" data-v-123="">"#
-        ));
-        assert!(result.contains(r#"<footer id="footer" data-root="" data-all="" data-v-123="">"#));
-
-        // Check nested elements have all_attributes but not root_attributes
-        assert!(result.contains(r#"<h1 title="Main Title" data-all="" data-v-123="">"#));
-        assert!(result.contains(r#"<nav data-existing="true" data-all="" data-v-123="">"#));
-        assert!(
-            result.contains(r#"<img src="test.jpg" alt="Test Image" data-all="" data-v-123=""/>"#)
-        );
-
-        // Verify we didn't mess up the content or structure
-        assert!(result.contains("Hello & Welcome"));
-        assert!(result.contains("&copy; 2024"));
-        assert!(result.contains(r#"<strong data-all="" data-v-123="">bold</strong>"#));
-    }
-
-    #[test]
-    fn test_void_elements() {
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-v-123".to_string()],
-            false,
-            None,
-        );
-
-        // Test various formats of void elements
-        let test_cases = [
-            (
-                "<meta charset=\"utf-8\">",
-                "<meta charset=\"utf-8\" data-root=\"\" data-v-123=\"\"/>"
-            ),
-            (
-                "<meta charset=\"utf-8\"/>",
-                "<meta charset=\"utf-8\" data-root=\"\" data-v-123=\"\"/>"
-            ),
-            (
-                "<div><br><hr></div>",
-                "<div data-root=\"\" data-v-123=\"\"><br data-v-123=\"\"/><hr data-v-123=\"\"/></div>"
-            ),
-            (
-                "<img src=\"test.jpg\" alt=\"Test\">",
-                "<img src=\"test.jpg\" alt=\"Test\" data-root=\"\" data-v-123=\"\"/>"
-            ),
-        ];
-
-        for (input, expected) in test_cases {
-            let (result, _) = transform(&config, input).unwrap();
-            assert_eq!(result, expected);
-        }
-
-        // Test multiple void elements in a complex structure
-        let input = r#"<div>
-            <link rel="stylesheet" href="style.css">
-            <img src="test.jpg">
-            <p>Text with<br>break</p>
-        </div>"#;
-
-        let (result, _) = transform(&config, input).unwrap();
-
-        // Verify void elements have attributes but no closing tags
-        assert!(result.contains(r#"<link rel="stylesheet" href="style.css" data-v-123=""/>"#));
-        assert!(result.contains(r#"<img src="test.jpg" data-v-123=""/>"#));
-        assert!(result.contains(r#"<br data-v-123=""/>"#));
-
-        // Verify non-void elements still have proper closing tags
-        assert!(result.contains("</p>"));
-        assert!(result.contains("</div>"));
-    }
-
-    #[test]
-    fn test_html_head_with_meta() {
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-v-123".to_string()],
-            false,
-            None,
-        );
-
-        let input = r#"
-            <head>
-                <meta charset="utf-8">
-                <title>Test Page</title>
-                <link rel="stylesheet" href="style.css">
-                <meta name="description" content="Test">
-            </head>"#;
-
-        let (result, _) = transform(&config, input).unwrap();
-
-        // Check that it parsed successfully
-        assert!(result.contains(r#"<meta charset="utf-8""#));
-        assert!(result.contains(r#"<title data-v-123="">Test Page</title>"#));
-        assert!(result.contains(r#"<link rel="stylesheet" href="style.css""#));
-
-        // Verify void elements are properly handled
-        assert!(!result.contains("</meta>"));
-        assert!(!result.contains("</link>"));
-        assert!(result.contains("/>"));
-    }
-
-    #[test]
-    fn test_config_check_end_names() {
-        // Test with check_end_names = false (lenient mode)
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-v-123".to_string()],
-            false, // Don't check end names
-            None,
-        );
-
-        // These should parse successfully with check_end_names = false
-        let lenient_cases = [
-            "<div><p>Hello</div></p>", // Mismatched nesting
-            "<div>Text</span>",        // Wrong closing tag
-            "<p>Text</wrong>",         // Non-matching end tag
-        ];
-
-        for input in lenient_cases {
-            assert!(transform(&config, input).is_ok());
-        }
-
-        // Test with check_end_names = true (strict mode)
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-v-123".to_string()],
-            true, // Check end names
-            None,
-        );
-
-        // These should fail with check_end_names = true
-        for input in lenient_cases {
-            assert!(transform(&config, input).is_err());
-        }
-
-        // But well-formed HTML should still work
-        let valid_input = "<div><p>Hello</p></div>";
-        assert!(transform(&config, valid_input).is_ok());
-    }
-
-    #[test]
-    fn test_watch_attribute() {
-        let config = HtmlTransformerConfig::new(
-            vec!["data-root".to_string()],
-            vec!["data-v-123".to_string()],
-            false,
-            Some("data-id".to_string()),
-        );
-
-        let input = r#"
-            <div data-id="123">
-                <p>Regular element</p>
-                <span data-id="456">Nested element</span>
-                <img data-id="789" src="test.jpg"/>
-            </div>"#;
-
-        let (result, captured) = transform(&config, input).unwrap();
-
-        println!("result: {}", result);
-        println!("captured: {:?}", captured);
-
-        // Verify HTML transformation worked
-        assert!(result.contains(r#"<div data-id="123" data-root="" data-v-123="">"#));
-        assert!(result.contains(r#"<span data-id="456" data-v-123="">"#));
-        assert!(result.contains(r#"<img data-id="789" src="test.jpg" data-v-123=""/>"#));
-
-        // Verify attribute capturing
-        assert_eq!(captured.len(), 3);
-        assert!(captured.iter().any(|(id, attrs)| id == "123"
-            && attrs.contains(&"data-root".to_string())
-            && attrs.contains(&"data-v-123".to_string())));
-        assert!(captured
-            .iter()
-            .any(|(id, attrs)| id == "456" && attrs.contains(&"data-v-123".to_string())));
-        assert!(captured
-            .iter()
-            .any(|(id, attrs)| id == "789" && attrs.contains(&"data-v-123".to_string())));
-    }
+    transform(config, html)
 }
diff --git a/crates/djc-html-transformer/src/transformer.rs b/crates/djc-html-transformer/src/transformer.rs
new file mode 100644
index 0000000..52c4fc3
--- /dev/null
+++ b/crates/djc-html-transformer/src/transformer.rs
@@ -0,0 +1,406 @@
+use quick_xml::events::{BytesStart, Event};
+use quick_xml::reader::Reader;
+use quick_xml::writer::Writer;
+use std::collections::HashSet;
+use std::io::Cursor;
+
+// List of HTML5 void elements. These can be written as `<tag>` or `<tag />`,
+//e.g. `<br />`, `<link />`, `<img />`, etc.
+const VOID_ELEMENTS: [&str; 14] = [
+    "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source",
+    "track", "wbr",
+];
+
+/// Configuration for HTML transformation
+pub struct HtmlTransformerConfig {
+    root_attributes: Vec<String>,
+    all_attributes: Vec<String>,
+    void_elements: HashSet<String>,
+    check_end_names: bool,
+    watch_on_attribute: Option<String>,
+}
+
+impl HtmlTransformerConfig {
+    pub fn new(
+        root_attributes: Vec<String>,
+        all_attributes: Vec<String>,
+        check_end_names: bool,
+        watch_on_attribute: Option<String>,
+    ) -> Self {
+        let void_elements = VOID_ELEMENTS.iter().map(|&s| s.to_string()).collect();
+
+        HtmlTransformerConfig {
+            root_attributes,
+            all_attributes,
+            void_elements,
+            check_end_names,
+            watch_on_attribute,
+        }
+    }
+}
+
+/// Add attributes to a HTML start tag (e.g. `<div>`) based on the configuration
+fn add_attributes(
+    config: &HtmlTransformerConfig,
+    element: &mut BytesStart,
+    is_root: bool,
+    captured_attributes: &mut Vec<(String, Vec<String>)>,
+) {
+    let mut added_attrs = Vec::new();
+
+    // Add root attributes if this is a root element
+    if is_root {
+        for attr in &config.root_attributes {
+            element.push_attribute((attr.as_str(), ""));
+            added_attrs.push(attr.clone());
+        }
+    }
+
+    // Add attributes that should be applied to all elements
+    for attr in &config.all_attributes {
+        element.push_attribute((attr.as_str(), ""));
+        added_attrs.push(attr.clone());
+    }
+
+    // If we're watching for a specific attribute, check if this element has it
+    if let Some(watch_attr) = &config.watch_on_attribute {
+        if let Some(attr_value) = element
+            .attributes()
+            .find(|a| {
+                if let Ok(attr) = a {
+                    String::from_utf8_lossy(attr.key.as_ref()) == *watch_attr
+                } else {
+                    false
+                }
+            })
+            .and_then(|a| a.ok())
+            .map(|a| String::from_utf8_lossy(a.value.as_ref()).into_owned())
+        {
+            captured_attributes.push((attr_value, added_attrs));
+        }
+    }
+}
+
+/// Main entrypoint. Transform HTML by adding attributes to the elements.
+pub fn transform(
+    config: &HtmlTransformerConfig,
+    html: &str,
+) -> Result<(String, Vec<(String, Vec<String>)>), Box<dyn std::error::Error>> {
+    let mut reader = Reader::from_str(html);
+    let reader_config = reader.config_mut();
+    reader_config.check_end_names = config.check_end_names;
+    // Allow bare & in HTML content (e.g. "Hello & Welcome" instead of requiring "Hello &amp; Welcome")
+    // This is needed for compatibility with HTML5 which is more lenient than strict XML
+    reader_config.allow_dangling_amp = true;
+
+    // We transform the HTML by reading it and writing it simultaneously
+    let mut writer = Writer::new(Cursor::new(Vec::new()));
+    let mut captured_attributes = Vec::new();
+
+    // Track the nesting depth of elements to identify root elements (depth == 0)
+    let mut depth: i32 = 0;
+
+    // Read the HTML event by event
+    loop {
+        match reader.read_event() {
+            // Start tag
+            Ok(Event::Start(e)) => {
+                let tag_name = String::from_utf8_lossy(e.name().as_ref())
+                    .to_string()
+                    .to_lowercase();
+                let mut elem = e.into_owned();
+                add_attributes(config, &mut elem, depth == 0, &mut captured_attributes);
+
+                // For void elements, write as Empty event
+                if config.void_elements.contains(&tag_name) {
+                    writer.write_event(Event::Empty(elem))?;
+                } else {
+                    writer.write_event(Event::Start(elem))?;
+                    depth += 1;
+                }
+            }
+
+            // End tag
+            Ok(Event::End(e)) => {
+                let tag_name = String::from_utf8_lossy(e.name().as_ref())
+                    .to_string()
+                    .to_lowercase();
+
+                // Skip end tags for void elements
+                if !config.void_elements.contains(&tag_name) {
+                    writer.write_event(Event::End(e))?;
+                    depth -= 1;
+                }
+            }
+
+            // Empty element (AKA void or self-closing tag, e.g. `<br />`)
+            Ok(Event::Empty(e)) => {
+                let mut elem = e.into_owned();
+                add_attributes(config, &mut elem, depth == 0, &mut captured_attributes);
+                writer.write_event(Event::Empty(elem))?;
+            }
+
+            // End of file
+            Ok(Event::Eof) => break,
+            // Other events (e.g. comments, processing instructions, etc.)
+            Ok(e) => writer.write_event(e)?,
+            Err(e) => return Err(Box::new(e)),
+        }
+    }
+
+    // Convert the transformed HTML to a string
+    let result = String::from_utf8(writer.into_inner().into_inner())?;
+    Ok((result, captured_attributes))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_transformation() {
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-all".to_string()],
+            false,
+            None,
+        );
+
+        let input = "<div><p>Hello</p></div>";
+        let (result, _) = transform(&config, input).unwrap();
+
+        assert!(result.contains("data-root"));
+        assert!(result.contains("data-all"));
+    }
+
+    #[test]
+    fn test_multiple_roots() {
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-all".to_string()],
+            false,
+            None,
+        );
+
+        let input = "<div>First</div><span>Second</span>";
+        let (result, _) = transform(&config, input).unwrap();
+
+        // Both root elements should have data-root
+        assert_eq!(result.matches("data-root").count(), 2);
+        // All elements should have data-all
+        assert_eq!(result.matches("data-all").count(), 2);
+    }
+
+    #[test]
+    fn test_complex_html() {
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-all".to_string(), "data-v-123".to_string()],
+            false,
+            None,
+        );
+
+        let input = r#"
+            <div class="container" id="main">
+                <header class="flex">
+                    <h1 title="Main Title">Hello & Welcome</h1>
+                    <nav data-existing="true">
+                        <a href="/home">Home</a>
+                        <a href="/about" class="active">About</a>
+                    </nav>
+                </header>
+                <main>
+                    <article data-existing="true">
+                        <h2>Article 1</h2>
+                        <p>Some text with <strong>bold</strong> and <em>emphasis</em></p>
+                        <img src="test.jpg" alt="Test Image"/>
+                    </article>
+                </main>
+            </div>
+            <footer id="footer">
+                <p>&copy; 2024</p>
+            </footer>
+        "#;
+
+        let (result, _) = transform(&config, input).unwrap();
+
+        // Check root elements have root attributes
+        assert!(result.contains(
+            r#"<div class="container" id="main" data-root="" data-all="" data-v-123="">"#
+        ));
+        assert!(result.contains(r#"<footer id="footer" data-root="" data-all="" data-v-123="">"#));
+
+        // Check nested elements have all_attributes but not root_attributes
+        assert!(result.contains(r#"<h1 title="Main Title" data-all="" data-v-123="">"#));
+        assert!(result.contains(r#"<nav data-existing="true" data-all="" data-v-123="">"#));
+        assert!(
+            result.contains(r#"<img src="test.jpg" alt="Test Image" data-all="" data-v-123=""/>"#)
+        );
+
+        // Verify we didn't mess up the content or structure
+        assert!(result.contains("Hello & Welcome"));
+        assert!(result.contains("&copy; 2024"));
+        assert!(result.contains(r#"<strong data-all="" data-v-123="">bold</strong>"#));
+    }
+
+    #[test]
+    fn test_void_elements() {
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-v-123".to_string()],
+            false,
+            None,
+        );
+
+        // Test various formats of void elements
+        let test_cases = [
+            (
+                "<meta charset=\"utf-8\">",
+                "<meta charset=\"utf-8\" data-root=\"\" data-v-123=\"\"/>"
+            ),
+            (
+                "<meta charset=\"utf-8\"/>",
+                "<meta charset=\"utf-8\" data-root=\"\" data-v-123=\"\"/>"
+            ),
+            (
+                "<div><br><hr></div>",
+                "<div data-root=\"\" data-v-123=\"\"><br data-v-123=\"\"/><hr data-v-123=\"\"/></div>"
+            ),
+            (
+                "<img src=\"test.jpg\" alt=\"Test\">",
+                "<img src=\"test.jpg\" alt=\"Test\" data-root=\"\" data-v-123=\"\"/>"
+            ),
+        ];
+
+        for (input, expected) in test_cases {
+            let (result, _) = transform(&config, input).unwrap();
+            assert_eq!(result, expected);
+        }
+
+        // Test multiple void elements in a complex structure
+        let input = r#"<div>
+            <link rel="stylesheet" href="style.css">
+            <img src="test.jpg">
+            <p>Text with<br>break</p>
+        </div>"#;
+
+        let (result, _) = transform(&config, input).unwrap();
+
+        // Verify void elements have attributes but no closing tags
+        assert!(result.contains(r#"<link rel="stylesheet" href="style.css" data-v-123=""/>"#));
+        assert!(result.contains(r#"<img src="test.jpg" data-v-123=""/>"#));
+        assert!(result.contains(r#"<br data-v-123=""/>"#));
+
+        // Verify non-void elements still have proper closing tags
+        assert!(result.contains("</p>"));
+        assert!(result.contains("</div>"));
+    }
+
+    #[test]
+    fn test_html_head_with_meta() {
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-v-123".to_string()],
+            false,
+            None,
+        );
+
+        let input = r#"
+            <head>
+                <meta charset="utf-8">
+                <title>Test Page</title>
+                <link rel="stylesheet" href="style.css">
+                <meta name="description" content="Test">
+            </head>"#;
+
+        let (result, _) = transform(&config, input).unwrap();
+
+        // Check that it parsed successfully
+        assert!(result.contains(r#"<meta charset="utf-8""#));
+        assert!(result.contains(r#"<title data-v-123="">Test Page</title>"#));
+        assert!(result.contains(r#"<link rel="stylesheet" href="style.css""#));
+
+        // Verify void elements are properly handled
+        assert!(!result.contains("</meta>"));
+        assert!(!result.contains("</link>"));
+        assert!(result.contains("/>"));
+    }
+
+    #[test]
+    fn test_config_check_end_names() {
+        // Test with check_end_names = false (lenient mode)
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-v-123".to_string()],
+            false, // Don't check end names
+            None,
+        );
+
+        // These should parse successfully with check_end_names = false
+        let lenient_cases = [
+            "<div><p>Hello</div></p>", // Mismatched nesting
+            "<div>Text</span>",        // Wrong closing tag
+            "<p>Text</wrong>",         // Non-matching end tag
+        ];
+
+        for input in lenient_cases {
+            assert!(transform(&config, input).is_ok());
+        }
+
+        // Test with check_end_names = true (strict mode)
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-v-123".to_string()],
+            true, // Check end names
+            None,
+        );
+
+        // These should fail with check_end_names = true
+        for input in lenient_cases {
+            assert!(transform(&config, input).is_err());
+        }
+
+        // But well-formed HTML should still work
+        let valid_input = "<div><p>Hello</p></div>";
+        assert!(transform(&config, valid_input).is_ok());
+    }
+
+    #[test]
+    fn test_watch_attribute() {
+        let config = HtmlTransformerConfig::new(
+            vec!["data-root".to_string()],
+            vec!["data-v-123".to_string()],
+            false,
+            Some("data-id".to_string()),
+        );
+
+        let input = r#"
+            <div data-id="123">
+                <p>Regular element</p>
+                <span data-id="456">Nested element</span>
+                <img data-id="789" src="test.jpg"/>
+            </div>"#;
+
+        let (result, captured) = transform(&config, input).unwrap();
+
+        println!("result: {}", result);
+        println!("captured: {:?}", captured);
+
+        // Verify HTML transformation worked
+        assert!(result.contains(r#"<div data-id="123" data-root="" data-v-123="">"#));
+        assert!(result.contains(r#"<span data-id="456" data-v-123="">"#));
+        assert!(result.contains(r#"<img data-id="789" src="test.jpg" data-v-123=""/>"#));
+
+        // Verify attribute capturing
+        assert_eq!(captured.len(), 3);
+        assert!(captured.iter().any(|(id, attrs)| id == "123"
+            && attrs.contains(&"data-root".to_string())
+            && attrs.contains(&"data-v-123".to_string())));
+        assert!(captured
+            .iter()
+            .any(|(id, attrs)| id == "456" && attrs.contains(&"data-v-123".to_string())));
+        assert!(captured
+            .iter()
+            .any(|(id, attrs)| id == "789" && attrs.contains(&"data-v-123".to_string())));
+    }
+}
diff --git a/djc_core/__init__.py b/djc_core/__init__.py
index 68add3b..2dfb9ee 100644
--- a/djc_core/__init__.py
+++ b/djc_core/__init__.py
@@ -1,6 +1,6 @@
-# DO NOT MODIFY, ONLY UPDATE THE MODULE NAME WHEN NEEDED!
 # This file is what maturin auto-generates. But it seems maturin omits it when we have a __init__.pyi file.
 # So we have to manually include it here.
+# Following block of code is what maturin would've generated
 
 from .djc_core import *
 
diff --git a/djc_core/__init__.pyi b/djc_core/__init__.pyi
index eeeff7c..1774962 100644
--- a/djc_core/__init__.pyi
+++ b/djc_core/__init__.pyi
@@ -1,34 +1 @@
-from typing import List, Dict, Optional
-
-def set_html_attributes(
-    html: str,
-    root_attributes: List[str],
-    all_attributes: List[str],
-    check_end_names: Optional[bool] = None,
-    watch_on_attribute: Optional[str] = None,
-) -> tuple[str, Dict[str, List[str]]]:
-    """
-    Transform HTML by adding attributes to root and all elements.
-
-    Args:
-        html (str): The HTML string to transform. Can be a fragment or full document.
-        root_attributes (List[str]): List of attribute names to add to root elements only.
-        all_attributes (List[str]): List of attribute names to add to all elements.
-        check_end_names (Optional[bool]): Whether to validate matching of end tags. Defaults to None.
-        watch_on_attribute (Optional[str]): If set, captures which attributes were added to elements with this attribute.
-
-    Returns:
-        A tuple containing:
-            - The transformed HTML string
-            - A dictionary mapping captured attribute values to lists of attributes that were added
-              to those elements. Only returned if watch_on_attribute is set, otherwise empty dict.
-
-    Example:
-        >>> html = '<div><p>Hello</p></div>'
-        >>> set_html_attributes(html, ['data-root-id'], ['data-v-123'])
-        '<div data-root-id="" data-v-123=""><p data-v-123="">Hello</p></div>'
-
-    Raises:
-        ValueError: If the HTML is malformed or cannot be parsed.
-    """
-    ...
+from djc_core.djc_html_transformer import *
diff --git a/djc_core/djc_html_transformer.pyi b/djc_core/djc_html_transformer.pyi
new file mode 100644
index 0000000..a8e1f47
--- /dev/null
+++ b/djc_core/djc_html_transformer.pyi
@@ -0,0 +1,36 @@
+from typing import List, Dict, Optional
+
+def set_html_attributes(
+    html: str,
+    root_attributes: List[str],
+    all_attributes: List[str],
+    check_end_names: Optional[bool] = None,
+    watch_on_attribute: Optional[str] = None,
+) -> tuple[str, Dict[str, List[str]]]:
+    """
+    Transform HTML by adding attributes to root and all elements.
+
+    Args:
+        html (str): The HTML string to transform. Can be a fragment or full document.
+        root_attributes (List[str]): List of attribute names to add to root elements only.
+        all_attributes (List[str]): List of attribute names to add to all elements.
+        check_end_names (Optional[bool]): Whether to validate matching of end tags. Defaults to None.
+        watch_on_attribute (Optional[str]): If set, captures which attributes were added to elements with this attribute.
+
+    Returns:
+        A tuple containing:
+            - The transformed HTML string
+            - A dictionary mapping captured attribute values to lists of attributes that were added
+              to those elements. Only returned if watch_on_attribute is set, otherwise empty dict.
+
+    Example:
+        >>> html = '<div><p>Hello</p></div>'
+        >>> set_html_attributes(html, ['data-root-id'], ['data-v-123'])
+        '<div data-root-id="" data-v-123=""><p data-v-123="">Hello</p></div>'
+
+    Raises:
+        ValueError: If the HTML is malformed or cannot be parsed.
+    """
+    ...
+
+__all__ = ["set_html_attributes"]

From e3021c13500d0f0b8d4ff99a697319d20bb28b03 Mon Sep 17 00:00:00 2001
From: Juro Oravec <juraj.oravec.josefson@gmail.com>
Date: Thu, 23 Oct 2025 09:52:06 +0200
Subject: [PATCH 2/3] docs: update README

---
 README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.md b/README.md
index b8ebc79..dd1110d 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,24 @@ print(captured)
 # }
 ```
 
+## Architecture
+
+This project uses a multi-crate Rust workspace structure to maintain clean separation of concerns:
+
+### Crate Structure
+
+- **`djc-html-transformer`**: Pure Rust library for HTML transformation
+- **`djc-template-parser`**: Pure Rust library for Django template parsing
+- **`djc-core`**: Python bindings that combine all other libraries
+
+### Design Philosophy
+
+To make sense of the code and keep it clean, the Python API and Rust logic are defined separately:
+
+1. Each crate (AKA Rust package) has `lib.rs` (which is like Python's `__init__.py`). These files do not define the main logic, but only the public API of the crate. So the API that's to be used by other crates.
+2. The `djc-core` crate imports other crates
+3. And it is only this `djc-core` where we define the Python API using PyO3.
+
 ## Development
 
 1. Setup python env

From 3a166422a5068a059b0668417bb60b1c147c9dc7 Mon Sep 17 00:00:00 2001
From: Juro Oravec <juraj.oravec.josefson@gmail.com>
Date: Thu, 23 Oct 2025 09:53:33 +0200
Subject: [PATCH 3/3] docs: minor fixes

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index dd1110d..52bbb5c 100644
--- a/README.md
+++ b/README.md
@@ -76,15 +76,15 @@ print(captured)
 
 This project uses a multi-crate Rust workspace structure to maintain clean separation of concerns:
 
-### Crate Structure
+### Crate structure
 
 - **`djc-html-transformer`**: Pure Rust library for HTML transformation
 - **`djc-template-parser`**: Pure Rust library for Django template parsing
-- **`djc-core`**: Python bindings that combine all other libraries
+- **`djc-core`**: Python bindings that combines all other libraries
 
-### Design Philosophy
+### Design philosophy
 
-To make sense of the code and keep it clean, the Python API and Rust logic are defined separately:
+To make sense of the code, the Python API and Rust logic are defined separately:
 
 1. Each crate (AKA Rust package) has `lib.rs` (which is like Python's `__init__.py`). These files do not define the main logic, but only the public API of the crate. So the API that's to be used by other crates.
 2. The `djc-core` crate imports other crates