From 1b0302e137d7aa145428dbd23ba4fbd78dee8819 Mon Sep 17 00:00:00 2001
From: Bo Lopker <lopkerk@gmail.com>
Date: Tue, 14 Oct 2025 21:12:37 -0700
Subject: [PATCH 1/2] Add llm instructions

---
 CHANGELOG.md                    |   4 +
 Cargo.toml                      |   1 +
 journal/add_language_support.md | 360 ++++++++++++++++++++++++++++++++
 3 files changed, 365 insertions(+)
 create mode 100644 journal/add_language_support.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3501b3..9a32f2a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+[0.3.14]
+
+- Add Zig support
+
 [0.3.13]
 
 - Add server version to logs on startup
diff --git a/Cargo.toml b/Cargo.toml
index 81ba5db..132eee1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,6 +62,7 @@ tree-sitter-rust = "<0.25.0"
 tree-sitter-toml-ng = "<0.8.0"
 tree-sitter-typescript = "0.23.2"
 tree-sitter-zig = "<2"
+codebook-tree-sitter-latex = "<0.7.0"
 unicode-segmentation = "1.12.0"
 url = "2.4.0"
 walkdir = "2.3.3"
diff --git a/journal/add_language_support.md b/journal/add_language_support.md
new file mode 100644
index 0000000..41fd43e
--- /dev/null
+++ b/journal/add_language_support.md
@@ -0,0 +1,360 @@
+# Adding New Language Support to Codebook
+
+## LLM Guide for Adding Programming Language Support
+
+This document provides a systematic approach for adding new programming language support to Codebook. Follow these steps in order.
+
+## Prerequisites
+
+- Tree-sitter grammar package name and version for the target language
+- Access to the language's tree-sitter repository (usually on GitHub)
+- Understanding of the language's syntax basics
+
+## Step-by-Step Process
+
+### 1. Research the Tree-sitter Grammar
+
+Before starting, gather information:
+
+- **Grammar repository**: Find the official tree-sitter grammar repository (e.g., `https://github.com/tree-sitter-grammars/tree-sitter-LANGUAGE`)
+- **Package name**: Identify the exact crate name (e.g., `tree-sitter-zig`)
+- **Version**: Determine the version to use (check crates.io or user specification)
+- **Node types**: Fetch the `queries/highlights.scm` file from the repository to understand node structure
+
+**Key files to examine in the grammar repository:**
+- `queries/highlights.scm` - Shows what node types exist
+- `src/node-types.json` - Complete node type definitions (if available)
+- Example code in the repository's tests
+
+### 2. Add Workspace Dependency
+
+Edit `Cargo.toml` (workspace root):
+
+```toml
+[workspace.dependencies]
+# ... existing dependencies ...
+tree-sitter-LANGUAGE = "VERSION"
+```
+
+**Example:**
+```toml
+tree-sitter-zig = "1.1.2"
+```
+
+### 3. Add Crate Dependency
+
+Edit `crates/codebook/Cargo.toml`:
+
+```toml
+[dependencies]
+# ... existing dependencies ...
+tree-sitter-LANGUAGE.workspace = true
+```
+
+Add in alphabetical order with other tree-sitter dependencies.
+
+### 4. Update Language Type Enum
+
+Edit `crates/codebook/src/queries.rs`:
+
+Add variant to `LanguageType` enum in **alphabetical order**:
+
+```rust
+#[derive(Debug, Clone, PartialEq, Copy)]
+pub enum LanguageType {
+    Bash,
+    C,
+    // ... other languages ...
+    YourLanguage,  // Add here
+    Zig,
+}
+```
+
+### 5. Add Language Setting
+
+In `crates/codebook/src/queries.rs`, add entry to `LANGUAGE_SETTINGS` array:
+
+```rust
+LanguageSetting {
+    type_: LanguageType::YourLanguage,
+    ids: &["language_id"],  // LSP language identifier
+    dictionary_ids: &["language_id"],  // Dictionary lookup
+    query: include_str!("queries/yourlanguage.scm"),
+    extensions: &["ext1", "ext2"],  // File extensions
+},
+```
+
+**Important notes:**
+- `ids`: Language identifiers from [VSCode language identifiers](https://code.visualstudio.com/docs/languages/identifiers)
+- `extensions`: Common file extensions without the dot
+- Place in the array (order doesn't matter functionally but keep consistent)
+
+### 6. Add Language Function Match Arm
+
+In `crates/codebook/src/queries.rs`, update the `language()` method in `impl LanguageSetting`:
+
+```rust
+pub fn language(&self) -> Option<Language> {
+    match self.type_ {
+        // ... existing matches ...
+        LanguageType::YourLanguage => Some(tree_sitter_language::LANGUAGE.into()),
+        // OR if the crate has a function:
+        LanguageType::YourLanguage => Some(tree_sitter_language::language().into()),
+    }
+}
+```
+
+**Note:** Check the tree-sitter crate's API. Most expose either:
+- `LANGUAGE` constant (older style)
+- `language()` function (newer style)
+- `LANGUAGE_TYPENAME` for multi-language crates (e.g., `LANGUAGE_PHP`, `LANGUAGE_TYPESCRIPT`)
+
+### 7. Create Tree-sitter Query File
+
+Create `crates/codebook/src/queries/yourlanguage.scm`
+
+**Query file structure:**
+```scheme
+; Comments - capture all comment types
+(line_comment) @comment
+(block_comment) @comment
+(doc_comment) @comment
+
+; Identifiers - capture DEFINITIONS only, not usages
+(function_declaration
+  name: (identifier) @identifier)
+
+(variable_declaration
+  (identifier) @identifier)
+
+(parameter
+  (identifier) @identifier)
+
+; Struct/Type definitions
+(struct_declaration
+  name: (type_identifier) @identifier)
+
+(field_declaration
+  name: (field_identifier) @identifier)
+
+; String literals - capture string content
+(string_content) @string
+(string) @string
+```
+
+**Critical guidelines:**
+- Focus on **definitions**, not references/usages
+- Capture user-defined names, not keywords
+- Include comments (all types)
+- Include string literals
+- Test the query thoroughly - invalid queries will fail compilation
+
+**How to discover node types:**
+1. Visit the grammar's GitHub repository
+2. Check `queries/highlights.scm` for existing patterns
+3. Use [Tree-sitter Playground](https://tree-sitter.github.io/tree-sitter/playground.html) to test
+4. Copy sample code, paste into playground with your grammar
+5. Inspect the AST structure to identify node types
+
+**Common node type patterns by language:**
+- Identifiers: `identifier`, `IDENTIFIER`, `name`
+- Strings: `string`, `string_content`, `string_literal`
+- Comments: `comment`, `line_comment`, `block_comment`, `doc_comment`
+- Functions: `function_declaration`, `function_definition`, `FnProto`
+- Variables: `variable_declaration`, `var_decl`, `VarDecl`
+
+### 8. Create Example File
+
+Create `examples/example.LANGUAGE` with intentional spelling errors:
+
+**Requirements:**
+- Must contain at least one spelling error (for integration tests)
+- Include various language constructs: functions, variables, comments, strings
+- Use realistic code patterns
+- Include misspellings in: identifiers, strings, comments
+
+**Example structure:**
+```language
+// Comment with speling error
+const myVarible = "Hello Wolrd";
+
+function processDatta(inputt) {
+    const resullt = inputt + 1;
+    return resullt;
+}
+```
+
+### 9. Create Test File
+
+Create `crates/codebook/tests/test_yourlanguage.rs`:
+
+**Template:**
+```rust
+use codebook::{
+    parser::{TextRange, WordLocation},
+    queries::LanguageType,
+};
+
+mod utils;
+
+#[test]
+fn test_yourlanguage_location() {
+    utils::init_logging();
+    let sample_text = r#"
+// Your sample code with misspellings
+const speling = "error";
+"#;
+    
+    let expected = vec![
+        WordLocation::new(
+            "speling".to_string(),
+            vec![TextRange {
+                start_byte: 6,  // Calculate exact byte positions
+                end_byte: 13,
+            }],
+        ),
+        // Add more expected misspellings
+    ];
+    
+    let not_expected = ["const", "std"];  // Keywords that should NOT be flagged
+    
+    let processor = utils::get_processor();
+    let misspelled = processor
+        .spell_check(sample_text, Some(LanguageType::YourLanguage), None)
+        .to_vec();
+    
+    println!("Misspelled words: {misspelled:?}");
+    
+    for e in &expected {
+        println!("Expecting: {e:?}");
+        let miss = misspelled.iter().find(|r| r.word == e.word).unwrap();
+        assert_eq!(miss.locations, e.locations);
+    }
+    
+    for result in misspelled {
+        assert!(!not_expected.contains(&result.word.as_str()));
+    }
+}
+```
+
+**Test requirements:**
+- Include multiple types of misspellings
+- Verify byte positions are exact
+- Ensure keywords are NOT captured
+- Test comments, strings, and identifiers separately
+
+### 10. Run Tests
+
+Execute in order:
+
+```bash
+# 1. Verify query is valid
+cargo test -p codebook queries::tests::test_all_queries_are_valid
+
+# 2. Run language-specific test
+cargo test -p codebook test_yourlanguage
+
+# 3. Run all tests
+cargo test -p codebook
+```
+
+## Common Issues and Solutions
+
+### Issue: Invalid query error with node type
+
+**Error:** `QueryError { message: "NodeTypeName", kind: NodeType }`
+
+**Solution:**
+- The node type doesn't exist in the grammar
+- Check the grammar's `queries/highlights.scm` for correct node names
+- Node types are case-sensitive
+- Use tree-sitter playground to verify AST structure
+
+### Issue: Capturing too many or too few occurrences
+
+**Problem:** Test fails because word appears more times than expected
+
+**Solution:**
+- Refine query to capture only definitions, not usages
+- Use field names in captures: `name: (identifier)` instead of just `(identifier)`
+- Check if you're capturing both definition and reference
+
+### Issue: Keywords being captured
+
+**Problem:** Language keywords appear in misspelled words
+
+**Solution:**
+- Don't capture `(keyword)` nodes
+- Be specific in queries - use parent node context
+- Only capture user-defined names
+
+### Issue: Wrong language() function syntax
+
+**Error:** Compilation error in `language()` match arm
+
+**Solution:**
+- Check the tree-sitter crate documentation
+- Try: `LANGUAGE.into()`, `language().into()`, or `LANGUAGE_VARIANT.into()`
+- Look at the crate's lib.rs for the public API
+
+## Testing Checklist
+
+Before considering the implementation complete:
+
+- [ ] Query file compiles without errors
+- [ ] `test_all_queries_are_valid` passes
+- [ ] Language-specific test passes
+- [ ] Example file exists with intentional errors
+- [ ] All expected misspellings are caught
+- [ ] No keywords are captured
+- [ ] Byte positions in tests are accurate
+- [ ] Comments are captured
+- [ ] String literals are captured
+- [ ] Identifier definitions are captured
+
+## File Modification Summary
+
+Files that MUST be modified:
+
+1. `Cargo.toml` - Add workspace dependency
+2. `crates/codebook/Cargo.toml` - Add crate dependency
+3. `crates/codebook/src/queries.rs` - Add enum variant, setting, and language match
+4. `crates/codebook/src/queries/LANGUAGE.scm` - Create query file
+5. `examples/example.LANGUAGE` - Create example file
+6. `crates/codebook/tests/test_LANGUAGE.rs` - Create test file
+
+## Query File Best Practices
+
+1. **Start simple**: Begin with basic captures (comments, simple identifiers)
+2. **Test incrementally**: Add one capture type at a time
+3. **Use field names**: `name: (identifier)` is better than `(identifier)`
+4. **Check highlights.scm**: The language's highlight query is your best reference
+5. **Avoid ambiguity**: Be specific about what context you're capturing
+6. **Comment your queries**: Explain what each section captures
+
+## Example: Real Implementation Reference
+
+For a complete reference implementation, examine existing languages:
+- Simple: `queries/go.scm`, `tests/test_go.rs`
+- Complex: `queries/rust.scm`, `tests/test_rust.rs`
+- With strings: `queries/python.scm`, `tests/test_python.rs`
+
+## Byte Position Calculation
+
+Tests require exact byte positions. To calculate:
+
+1. Copy your sample text exactly as in the test
+2. Count UTF-8 bytes from start of string to word start
+3. Count UTF-8 bytes from start of string to word end
+4. Remember: Most ASCII characters are 1 byte, but check UTF-8 encoding
+
+**Pro tip**: Print actual results first, then use those byte positions in your test expectations.
+
+## Final Verification
+
+Run the full test suite:
+```bash
+cargo test -p codebook
+```
+
+All tests should pass. If not, review error messages and adjust queries or test expectations.
\ No newline at end of file

From 83a5b538eb09e8ebeff6979425d44c79dc44422d Mon Sep 17 00:00:00 2001
From: Bo Lopker <lopkerk@gmail.com>
Date: Tue, 14 Oct 2025 22:44:49 -0700
Subject: [PATCH 2/2] Add LaTeX support

---
 Cargo.lock                            |  11 ++
 README.md                             |   2 +
 crates/codebook/Cargo.toml            |   1 +
 crates/codebook/src/parser.rs         |  26 ++-
 crates/codebook/src/queries.rs        |   9 +
 crates/codebook/src/queries/latex.scm |   5 +
 crates/codebook/tests/test_elixir.rs  |   5 +-
 crates/codebook/tests/test_go.rs      |   6 +-
 crates/codebook/tests/test_latex.rs   | 234 ++++++++++++++++++++++++++
 crates/codebook/tests/test_php.rs     |   5 +-
 examples/example.tex                  | 111 +++++++-----
 journal/add_language_support.md       |  18 +-
 word_lists/latex.txt                  |   2 +
 13 files changed, 375 insertions(+), 60 deletions(-)
 create mode 100644 crates/codebook/src/queries/latex.scm
 create mode 100644 crates/codebook/tests/test_latex.rs
 create mode 100644 word_lists/latex.txt

diff --git a/Cargo.lock b/Cargo.lock
index 0328364..81f08b6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -414,6 +414,7 @@ dependencies = [
 name = "codebook"
 version = "0.3.13"
 dependencies = [
+ "codebook-tree-sitter-latex",
  "codebook_config",
  "codebook_downloader",
  "env_logger",
@@ -472,6 +473,16 @@ dependencies = [
  "tower-lsp",
 ]
 
+[[package]]
+name = "codebook-tree-sitter-latex"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b1ee613183ae5dd1f992921539d573f9b0c38a7cefbde8e97092c3824ba2fb1"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
+
 [[package]]
 name = "codebook_config"
 version = "0.3.13"
diff --git a/README.md b/README.md
index f08216b..035d1cc 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,7 @@ Codebook is in active development. As better dictionaries are added, words that
 | Haskell | ⚠️ |
 | Java | ✅ |
 | JavaScript | ✅ |
+| LaTeX | ⚠️ |
 | Lua | ✅ |
 | Markdown | ✅ |
 | PHP | ⚠️ |
@@ -140,6 +141,7 @@ Codebook is in active development. As better dictionaries are added, words that
 | Rust | ✅ |
 | TOML | ✅ |
 | TypeScript | ✅ |
+| Zig | ✅ |
 
 ✅ = Good to go.
 ⚠️ = Supported, but needs more testing. Help us improve!
diff --git a/crates/codebook/Cargo.toml b/crates/codebook/Cargo.toml
index 7cee939..550b4a5 100644
--- a/crates/codebook/Cargo.toml
+++ b/crates/codebook/Cargo.toml
@@ -39,6 +39,7 @@ tree-sitter-haskell.workspace = true
 tree-sitter-html.workspace = true
 tree-sitter-java.workspace = true
 tree-sitter-javascript.workspace = true
+codebook-tree-sitter-latex.workspace = true
 tree-sitter-lua.workspace = true
 tree-sitter-php.workspace = true
 tree-sitter-python.workspace = true
diff --git a/crates/codebook/src/parser.rs b/crates/codebook/src/parser.rs
index 031197c..4afa164 100644
--- a/crates/codebook/src/parser.rs
+++ b/crates/codebook/src/parser.rs
@@ -2,12 +2,12 @@ use crate::splitter::{self};
 
 use crate::queries::{LanguageType, get_language_setting};
 use regex::Regex;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use streaming_iterator::StreamingIterator;
 use tree_sitter::{Parser, Query, QueryCursor};
 use unicode_segmentation::UnicodeSegmentation;
 
-#[derive(Debug, Clone, Copy, PartialEq, Ord, Eq, PartialOrd)]
+#[derive(Debug, Clone, Copy, PartialEq, Ord, Eq, PartialOrd, Hash)]
 pub struct TextRange {
     /// Start position in utf-8 byte offset
     pub start_byte: usize,
@@ -195,7 +195,7 @@ fn find_locations_code(
 
     let query = Query::new(&language, language_setting.query).unwrap();
     let mut cursor = QueryCursor::new();
-    let mut word_locations: HashMap<String, Vec<TextRange>> = HashMap::new();
+    let mut word_locations: HashMap<String, HashSet<TextRange>> = HashMap::new();
     let provider = text.as_bytes();
     let mut matches_query = cursor.matches(&query, root_node, provider);
 
@@ -217,13 +217,18 @@ fn find_locations_code(
                             end_byte: range.end_byte + node_start_byte,
                         };
                         if let Some(existing_result) = word_locations.get_mut(&word_pos.word) {
+                            let added = existing_result.insert(location);
                             #[cfg(debug_assertions)]
-                            if existing_result.contains(&location) {
-                                panic!("Two of the same locations found. Make a better query.")
+                            if !added {
+                                let word = word_pos.word.clone();
+                                panic!(
+                                    "Two of the same locations found. Make a better query. Word: {word}, Location: {location:?}"
+                                )
                             }
-                            existing_result.push(location);
                         } else {
-                            word_locations.insert(word_pos.word.clone(), vec![location]);
+                            let mut set = HashSet::new();
+                            set.insert(location);
+                            word_locations.insert(word_pos.word.clone(), set);
                         }
                     }
                 }
@@ -235,7 +240,12 @@ fn find_locations_code(
         .keys()
         .map(|word| WordLocation {
             word: word.clone(),
-            locations: word_locations.get(word).cloned().unwrap_or_default(),
+            locations: word_locations
+                .get(word)
+                .cloned()
+                .unwrap_or_default()
+                .into_iter()
+                .collect(),
         })
         .collect()
 }
diff --git a/crates/codebook/src/queries.rs b/crates/codebook/src/queries.rs
index e4ecd6a..8d29392 100644
--- a/crates/codebook/src/queries.rs
+++ b/crates/codebook/src/queries.rs
@@ -14,6 +14,7 @@ pub enum LanguageType {
     HTML,
     Java,
     Javascript,
+    Latex,
     Lua,
     Php,
     Python,
@@ -106,6 +107,13 @@ pub static LANGUAGE_SETTINGS: &[LanguageSetting] = &[
         query: include_str!("queries/javascript.scm"),
         extensions: &["js", "jsx"],
     },
+    LanguageSetting {
+        type_: LanguageType::Latex,
+        ids: &["latex"],
+        dictionary_ids: &["latex"],
+        query: include_str!("queries/latex.scm"),
+        extensions: &["tex", "latex", "ltx"],
+    },
     LanguageSetting {
         type_: LanguageType::Typescript,
         ids: &["typescript", "typescriptreact"],
@@ -216,6 +224,7 @@ impl LanguageSetting {
             LanguageType::HTML => Some(tree_sitter_html::LANGUAGE.into()),
             LanguageType::Java => Some(tree_sitter_java::LANGUAGE.into()),
             LanguageType::Javascript => Some(tree_sitter_javascript::LANGUAGE.into()),
+            LanguageType::Latex => Some(codebook_tree_sitter_latex::language()),
             LanguageType::Lua => Some(tree_sitter_lua::LANGUAGE.into()),
             LanguageType::Php => Some(tree_sitter_php::LANGUAGE_PHP.into()),
             LanguageType::Python => Some(tree_sitter_python::LANGUAGE.into()),
diff --git a/crates/codebook/src/queries/latex.scm b/crates/codebook/src/queries/latex.scm
new file mode 100644
index 0000000..1cc2017
--- /dev/null
+++ b/crates/codebook/src/queries/latex.scm
@@ -0,0 +1,5 @@
+; Comments - capture LaTeX comments (lines starting with %)
+(line_comment) @comment
+
+; Text content in the document
+(text) @string
diff --git a/crates/codebook/tests/test_elixir.rs b/crates/codebook/tests/test_elixir.rs
index ad003dd..0ee2d86 100644
--- a/crates/codebook/tests/test_elixir.rs
+++ b/crates/codebook/tests/test_elixir.rs
@@ -163,7 +163,10 @@ fn test_elixir_module() {
         println!("Expecting {}", expect.word);
         let result = misspelled.iter().find(|r| r.word == expect.word).unwrap();
         assert_eq!(result.word, expect.word);
-        assert_eq!(result.locations, expect.locations);
+        assert!(result.locations.len() == expect.locations.len());
+        for location in result.locations.iter() {
+            assert!(expect.locations.contains(location))
+        }
     }
 }
 
diff --git a/crates/codebook/tests/test_go.rs b/crates/codebook/tests/test_go.rs
index 0815aba..db51c08 100644
--- a/crates/codebook/tests/test_go.rs
+++ b/crates/codebook/tests/test_go.rs
@@ -245,7 +245,11 @@ fn test_go_location() {
     for e in &expected {
         println!("Expecting: {e:?}");
         let miss = misspelled.iter().find(|r| r.word == e.word).unwrap();
-        assert_eq!(miss.locations, e.locations);
+        // assert_eq!(miss.locations, e.locations);
+        assert!(miss.locations.len() == e.locations.len());
+        for location in &miss.locations {
+            assert!(e.locations.contains(location));
+        }
     }
     for result in misspelled {
         assert!(!not_expected.contains(&result.word.as_str()));
diff --git a/crates/codebook/tests/test_latex.rs b/crates/codebook/tests/test_latex.rs
new file mode 100644
index 0000000..1601ae1
--- /dev/null
+++ b/crates/codebook/tests/test_latex.rs
@@ -0,0 +1,234 @@
+use codebook::{
+    parser::{TextRange, WordLocation},
+    queries::LanguageType,
+};
+
+mod utils;
+
+#[test]
+fn test_latex_comments() {
+    utils::init_logging();
+    let sample_text = r#"
+% This is a coment with a typo
+% Another commnet with wrng spelling
+\documentclass{article}
+    "#;
+    let expected = vec![
+        WordLocation::new(
+            "coment".to_string(),
+            vec![TextRange {
+                start_byte: 13,
+                end_byte: 19,
+            }],
+        ),
+        WordLocation::new(
+            "commnet".to_string(),
+            vec![TextRange {
+                start_byte: 42,
+                end_byte: 49,
+            }],
+        ),
+        WordLocation::new(
+            "wrng".to_string(),
+            vec![TextRange {
+                start_byte: 55,
+                end_byte: 59,
+            }],
+        ),
+    ];
+    let not_expected = vec!["documentclass", "article"];
+    let processor = utils::get_processor();
+    let misspelled = processor
+        .spell_check(sample_text, Some(LanguageType::Latex), None)
+        .to_vec();
+    println!("Misspelled words: {misspelled:?}");
+    for e in &expected {
+        println!("Expecting: {e:?}");
+        let miss = misspelled
+            .iter()
+            .find(|r| r.word == e.word)
+            .unwrap_or_else(|| panic!("Word '{}' not found in misspelled list", e.word));
+        assert_eq!(miss.locations, e.locations);
+    }
+    for word in not_expected {
+        assert!(!misspelled.iter().any(|r| r.word == word));
+    }
+}
+
+#[test]
+fn test_latex_text_content() {
+    utils::init_logging();
+    let sample_text = r#"
+\section{Introducton}
+
+This is an exampl of text with speling errors.
+    "#;
+    let expected = vec!["Introducton", "exampl", "speling"];
+    let processor = utils::get_processor();
+    let binding = processor
+        .spell_check(sample_text, Some(LanguageType::Latex), None)
+        .to_vec();
+    let mut misspelled = binding
+        .iter()
+        .map(|r| r.word.as_str())
+        .collect::<Vec<&str>>();
+    misspelled.sort();
+    println!("Misspelled words: {misspelled:?}");
+    assert_eq!(misspelled, expected);
+}
+
+#[test]
+fn test_latex_sections_and_text() {
+    utils::init_logging();
+    let sample_text = r#"
+\section{Methology}
+
+The methology section describs the approach.
+
+\subsection{Bakground}
+
+In this secion we discuss importnt concepts.
+    "#;
+    let expected = vec![
+        "Bakground",
+        "Methology",
+        "describs",
+        "importnt",
+        "methology",
+        "secion",
+    ];
+    let processor = utils::get_processor();
+    let binding = processor
+        .spell_check(sample_text, Some(LanguageType::Latex), None)
+        .to_vec();
+    let mut misspelled = binding
+        .iter()
+        .map(|r| r.word.as_str())
+        .collect::<Vec<&str>>();
+    misspelled.sort();
+    println!("Misspelled words: {misspelled:?}");
+    assert_eq!(misspelled, expected);
+}
+
+#[test]
+fn test_latex_itemize() {
+    utils::init_logging();
+    let sample_text = r#"
+\begin{itemize}
+    \item First itm with algoritm
+    \item Second itm about formulas
+\end{itemize}
+    "#;
+    let expected = vec!["algoritm", "itm"];
+    let processor = utils::get_processor();
+    let binding = processor
+        .spell_check(sample_text, Some(LanguageType::Latex), None)
+        .to_vec();
+    let mut misspelled = binding
+        .iter()
+        .map(|r| r.word.as_str())
+        .collect::<Vec<&str>>();
+    misspelled.sort();
+    println!("Misspelled words: {misspelled:?}");
+    assert_eq!(misspelled, expected);
+}
+
+#[test]
+fn test_latex_mixed_content() {
+    utils::init_logging();
+    let sample_text = r#"
+% Comment: calcuate the result
+\section{Resuts}
+
+The resuts show our aproach is efective.
+
+\begin{equation}
+    E = mc^2 \label{eq:enrgy}
+\end{equation}
+
+As shown in Equation~\ref{eq:enrgy}, the relatioship is clear.
+    "#;
+    let expected = vec![
+        "Resuts",
+        "aproach",
+        "calcuate",
+        "efective",
+        "enrgy",
+        "relatioship",
+        "resuts",
+    ];
+    let not_expected = vec!["equation", "label", "ref", "begin", "end", "section"];
+    let processor = utils::get_processor();
+    let binding = processor
+        .spell_check(sample_text, Some(LanguageType::Latex), None)
+        .to_vec();
+    let mut misspelled = binding
+        .iter()
+        .map(|r| r.word.as_str())
+        .collect::<Vec<&str>>();
+    misspelled.sort();
+    println!("Misspelled words: {misspelled:?}");
+    assert_eq!(misspelled, expected);
+    for word in not_expected {
+        assert!(!misspelled.contains(&word));
+    }
+}
+
+#[test]
+fn test_latex_comprehensive() {
+    utils::init_logging();
+    let sample_text = r#"
+\documentclass{article}
+
+% This coment has typos: wrng and speling
+\title{A Sampel Document}
+
+\begin{document}
+
+\section{Introducton}
+
+This docment demonstrates the spel checker.
+
+\subsection{Analyss}
+
+The analyss reveals paterns in the data.
+
+\end{document}
+    "#;
+    let expected = vec![
+        "Analyss",
+        "Introducton",
+        "Sampel",
+        "analyss",
+        "coment",
+        "docment",
+        "paterns",
+        "spel",
+        "speling",
+        "wrng",
+    ];
+    let not_expected = vec![
+        "documentclass",
+        "article",
+        "title",
+        "begin",
+        "end",
+        "document",
+        "section",
+        "subsection",
+    ];
+    let processor = utils::get_processor();
+    let binding = processor
+        .spell_check(sample_text, Some(LanguageType::Latex), None)
+        .to_vec();
+    let mut misspelled = binding
+        .iter()
+        .map(|r| r.word.as_str())
+        .collect::<Vec<&str>>();
+    misspelled.sort();
+    println!("Misspelled words: {misspelled:?}");
+    assert_eq!(misspelled, expected);
+    for word in not_expected {
+        assert!(!misspelled.contains(&word));
+    }
+}
diff --git a/crates/codebook/tests/test_php.rs b/crates/codebook/tests/test_php.rs
index be80ae0..166fcdf 100644
--- a/crates/codebook/tests/test_php.rs
+++ b/crates/codebook/tests/test_php.rs
@@ -204,7 +204,10 @@ $userDetails = $userr->getUserDeetails();
             .iter()
             .find(|r| r.word == e.word)
             .expect("Word not found");
-        assert_eq!(miss.locations, e.locations);
+        assert!(miss.locations.len() == e.locations.len());
+        for location in &miss.locations {
+            assert!(e.locations.contains(location));
+        }
     }
 
     for result in misspelled {
diff --git a/examples/example.tex b/examples/example.tex
index 07271fc..4abe015 100644
--- a/examples/example.tex
+++ b/examples/example.tex
@@ -1,46 +1,73 @@
-\documentclass[12pt]{article}
-\usepackage{lingmacros}
-\usepackage{tree-dvips}
+\documentclass{article}
+\usepackage{amsmath}
+\usepackage{graphicx}
+
+\title{A Sampel Document with Speling Erors}
+\author{John Doe}
+\date{\today}
+
 \begin{document}
 
-\section*{Notes for My Paper}
-
-Don't forget to include examples of topicalization.
-They look like this:
-
-{\small
-\enumsentence{Topicalization from sentential subject:\\
-\shortex{7}{a John$_i$ [a & kltukl & [el &
-  {\bf l-}oltoir & er & ngii$_i$ & a Mary]]}
-{ & {\bf R-}clear & {\sc comp} &
-  {\bf IR}.{\sc 3s}-love   & P & him & }
-{John, (it's) clear that Mary loves (him).}}
-}
-
-\subsection*{How to handle topicalization}
-
-I'll just assume a tree structure like (\ex{1}).
-
-{\small
-\enumsentence{Structure of A$'$ Projections:\\ [2ex]
-\begin{tabular}[t]{cccc}
-    & \node{i}{CP}\\ [2ex]
-    \node{ii}{Spec} &   &\node{iii}{C$'$}\\ [2ex]
-        &\node{iv}{C} & & \node{v}{SAgrP}
-\end{tabular}
-\nodeconnect{i}{ii}
-\nodeconnect{i}{iii}
-\nodeconnect{iii}{iv}
-\nodeconnect{iii}{v}
-}
-}
-
-\subsection*{Mood}
-
-Mood changes when there is a topic, as well as when
-there is WH-movement.  \emph{Irrealis} is the mood when
-there is a non-subject topic or WH-phrase in Comp.
-\emph{Realis} is the mood when there is a subject topic
-or WH-phrase.
+\maketitle
+
+% This is a coment with a typo: calcuate
+% Another commnet with wrng spellingg
+
+\section{Introducton}
+
+This is an exampl of a LaTeX document with intentinal spelling errors.
+The purpse of this docment is to test the spel checker functionality.
+
+\subsection{Bakground}
+
+In this secion, we will discuss some importnt concepts.
+The folowing list contains severl items:
+
+\begin{itemize}
+    \item First itm with a typo: algoritm
+    \item Second itm about mathmatical formulas
+    \item Third itm discussing resarch methods
+\end{itemize}
+
+\section{Methology}
+
+The methology section describs the approach used in this reserch.
+We will demonstate several techniqes for procesing data.
+
+\subsection{Data Colection}
+
+Data was colected from varous sources including:
+\begin{enumerate}
+    \item Primry sources
+    \item Secondry sources
+    \item Teriary references
+\end{enumerate}
+
+\section{Resuts}
+
+The resuts show that our aproach is efective.
+We can see this in the folowing equation:
+
+\begin{equation}
+    E = mc^2 \label{eq:enrgy}
+\end{equation}
+
+As shown in Equation~\ref{eq:enrgy}, the relatioship is clear.
+
+\subsection{Analyss}
+
+The analyss reveals severl interesting paternzs.
+These paterns suggest that our hypothsis was corect.
+
+% Comment: More detals needed here
+
+\section{Concluson}
+
+In concluson, this docment demonstrates variuos spelling errors
+that should be detectd by the spell checker. The implimentation
+should be able to identfy errors in text, coments, and labels.
+
+\bibliographystyle{plain}
+\bibliography{refereces}
 
 \end{document}
diff --git a/journal/add_language_support.md b/journal/add_language_support.md
index 41fd43e..bb799b1 100644
--- a/journal/add_language_support.md
+++ b/journal/add_language_support.md
@@ -204,7 +204,7 @@ fn test_yourlanguage_location() {
 // Your sample code with misspellings
 const speling = "error";
 "#;
-    
+
     let expected = vec![
         WordLocation::new(
             "speling".to_string(),
@@ -215,22 +215,22 @@ const speling = "error";
         ),
         // Add more expected misspellings
     ];
-    
+
     let not_expected = ["const", "std"];  // Keywords that should NOT be flagged
-    
+
     let processor = utils::get_processor();
     let misspelled = processor
         .spell_check(sample_text, Some(LanguageType::YourLanguage), None)
         .to_vec();
-    
+
     println!("Misspelled words: {misspelled:?}");
-    
+
     for e in &expected {
         println!("Expecting: {e:?}");
         let miss = misspelled.iter().find(|r| r.word == e.word).unwrap();
         assert_eq!(miss.locations, e.locations);
     }
-    
+
     for result in misspelled {
         assert!(!not_expected.contains(&result.word.as_str()));
     }
@@ -260,6 +260,10 @@ cargo test -p codebook
 
 ## Common Issues and Solutions
 
+### Issue: CamelCase words are getting split
+
+- Codebook processing splits words based on common word boundaries in programming like CamelCase and snake_case. Expect that when making tests.
+
 ### Issue: Invalid query error with node type
 
 **Error:** `QueryError { message: "NodeTypeName", kind: NodeType }`
@@ -357,4 +361,4 @@ Run the full test suite:
 cargo test -p codebook
 ```
 
-All tests should pass. If not, review error messages and adjust queries or test expectations.
\ No newline at end of file
+All tests should pass. If not, review error messages and adjust queries or test expectations.
diff --git a/word_lists/latex.txt b/word_lists/latex.txt
new file mode 100644
index 0000000..18ee27f
--- /dev/null
+++ b/word_lists/latex.txt
@@ -0,0 +1,2 @@
+bibliographystyle
+emph