gitronald · gitronald · Mar 16, 2026 · Feb 23, 2026 · Feb 23, 2026 · Mar 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -11,5 +11,7 @@ scripts/ads-no-subtype
 *.egg-info
 *__pycache__
 
-# Ignore test cache
+# Ignore caches
 .pytest_cache
+.ruff_cache
+.coverage
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.5
+    hooks:
+      - id: ruff-format
+      - id: ruff
+        args: [--fix]
diff --git a/README.md b/README.md
@@ -26,34 +26,20 @@ and position-based specifications.
 ---
 ## Recent Updates
 
-### 0.6.9
-
-- Fixed bugs in component parsers (class comparison, assignment operator, set literal)
-- Fixed `return` in `finally` block in requests searcher
-- Added captcha detection to feature extractor
-- Added captcha handling and jittered delay to demo searches
-- Dropped pandas from core dependencies
-- Cleaned up legacy typing imports
-- Removed poetry.toml
-
-### 0.6.8
-
-- Migrated from Poetry to uv for dependency management
-- Added Python 3.12-3.14 test matrix in GitHub Actions
-- Added `flights` classifier and `standard-4` layout
-- Added local service ad parser
-- Extracted bottom ads before main column
-- Fixed `return` in `finally` block warning in selenium searcher
-
-### 0.6.7
-
-- Added `get_text_by_selectors()` to `webutils` -- centralizes multi-selector fallback pattern across 7 component parsers
-- Added `perspectives`, `recent_posts`, and `latest_from` component classifiers
-- Added `sub_type` to perspectives parser from header text
-- Added CI test workflow on push to dev branch
-- Added compressed test fixtures with `condense_fixtures.py` script
-- Updated dependency lower bounds for security patches (protobuf, orjson)
-- Updated GitHub Actions to checkout v6 and setup-python v6
+### 0.7.0 (dev)
+
+- **Breaking:** `details` field is now always `dict | None` with a self-describing `type` key (e.g. `{"type": "menu", "items": [...]}`)
+- **Breaking:** `parse_serp()` now always returns a dict with `results` and `features` keys; the `extract_features` parameter has been removed
+- Standardized all models on Pydantic BaseModel (removed dataclasses)
+- Added `ResponseOutput` and `ParsedSERP` typed models
+- Removed `DetailsItem`, `DetailsList` classes
+- Normalized `local_results` sub_type for location-specific headers
+- Replaced `os` with `pathlib.Path` throughout
+- Consolidated `webutils.py` into `utils.py`
+- Added ruff formatting, linting, and pre-commit hooks
+- Added test coverage reporting (69%)
+- Added unit tests for utils, locations, models, and feature extractor
+- Replaced pandas with polars in demo scripts
 
 ---
 ## Getting Started
@@ -132,7 +118,7 @@ Example search and parse pipeline (via requests):
 import WebSearcher as ws
 se = ws.SearchEngine()                     # 1. Initialize collector
 se.search('immigration news')              # 2. Conduct a search
-se.parse_results()                         # 3. Parse search results
+se.parse_serp()                            # 3. Parse search results
 se.save_serp(append_to='serps.json')       # 4. Save HTML and metadata
 se.save_results(append_to='results.json')  # 5. Save parsed results
 
@@ -164,14 +150,14 @@ se.search('immigration news')
 
 #### 3. Parse Search Results
 
-The example below is primarily for parsing search results as you collect HTML.  
+The example below is primarily for parsing search results as you collect HTML.
 See `ws.parse_serp(html)` for parsing existing HTML data.
 
 ```python
-se.parse_results()
+se.parse_serp()
 
 # Show first result
-se.results[0]
+se.parsed.results[0]
 {'section': 'main',
  'cmpt_rank': 0,
  'sub_rank': 0,
@@ -288,10 +274,34 @@ To release a new version:
 ---
 ## Update Log
 
+`0.7.0`
+- Standardize data models on Pydantic, typed details field, remove DetailsItem/DetailsList
+
+`0.6.9`
+- Fixed bugs in component parsers (class comparison, assignment operator, set literal)
+- Fixed `return` in `finally` block in requests searcher
+- Added captcha detection to feature extractor
+- Added captcha handling and jittered delay to demo searches
+- Dropped pandas from core dependencies
+- Cleaned up legacy typing imports
+- Removed poetry.toml
+
+`0.6.8`
+- Migrated from Poetry to uv for dependency management
+- Added Python 3.12-3.14 test matrix in GitHub Actions
+- Added `flights` classifier and `standard-4` layout
+- Added local service ad parser
+- Extracted bottom ads before main column
+- Fixed `return` in `finally` block warning in selenium searcher
+
 `0.6.7`
-- Add `get_text_by_selectors()` utility, CI test workflow, compressed test fixtures
-- Add `perspectives`, `recent_posts`, `latest_from` classifiers and `sub_type` for perspectives
-- Update dependency bounds for security patches, GitHub Actions to v6
+- Added `get_text_by_selectors()` to `webutils` -- centralizes multi-selector fallback pattern across 7 component parsers
+- Added `perspectives`, `recent_posts`, and `latest_from` component classifiers
+- Added `sub_type` to perspectives parser from header text
+- Added CI test workflow on push to dev branch
+- Added compressed test fixtures with `condense_fixtures.py` script
+- Updated dependency lower bounds for security patches (protobuf, orjson)
+- Updated GitHub Actions to checkout v6 and setup-python v6
 
 `0.6.6`
 - Update packages with dependabot alerts (brotli, urllib3)

diff --git a/WebSearcher/__init__.py b/WebSearcher/__init__.py
@@ -1,8 +1,22 @@
-__version__ = "0.6.9"
-from .searchers import SearchEngine
-from .parsers import parse_serp
-from .feature_extractor import FeatureExtractor
+__version__ = "0.7.0"
+
+from .classifiers import ClassifyFooter, ClassifyMain
 from .extractors import Extractor
+from .feature_extractor import FeatureExtractor
 from .locations import download_locations
-from .classifiers import ClassifyMain, ClassifyFooter
-from .webutils import load_html, make_soup, load_soup
+from .parsers import parse_serp
+from .searchers import SearchEngine
+from .utils import load_html, load_soup, make_soup
+
+__all__ = [
+    "ClassifyFooter",
+    "ClassifyMain",
+    "Extractor",
+    "FeatureExtractor",
+    "download_locations",
+    "parse_serp",
+    "SearchEngine",
+    "load_html",
+    "load_soup",
+    "make_soup",
+]
diff --git a/WebSearcher/classifiers/__init__.py b/WebSearcher/classifiers/__init__.py
@@ -1,4 +1,11 @@
-from .header_text import ClassifyHeaderText
+from .footer import ClassifyFooter
 from .header_components import ClassifyHeaderComponent
+from .header_text import ClassifyHeaderText
 from .main import ClassifyMain
-from .footer import ClassifyFooter
+
+__all__ = [
+    "ClassifyFooter",
+    "ClassifyHeaderComponent",
+    "ClassifyHeaderText",
+    "ClassifyMain",
+]
diff --git a/WebSearcher/classifiers/footer.py b/WebSearcher/classifiers/footer.py
@@ -1,14 +1,15 @@
 import bs4
-from .. import webutils
+
+from .. import utils
 from .main import ClassifyMain
 
-class ClassifyFooter:
 
+class ClassifyFooter:
     @staticmethod
     def classify(cmpt: bs4.element.Tag) -> str:
         layout_conditions = [
-            ('id' in cmpt.attrs and cmpt.attrs['id'] in {'bres', 'brs'}),
-            ('class' in cmpt.attrs and cmpt.attrs['class'] == ['MjjYud']),
+            ("id" in cmpt.attrs and cmpt.attrs["id"] in {"bres", "brs"}),
+            ("class" in cmpt.attrs and cmpt.attrs["class"] == ["MjjYud"]),
         ]
 
         # Ordered list of classifiers to try
@@ -26,37 +27,40 @@ def classify(cmpt: bs4.element.Tag) -> str:
         # Default unknown, exit on first successful classification
         cmpt_type = "unknown"
         for classifier in classifier_list:
-            if cmpt_type != "unknown":  break
+            if cmpt_type != "unknown":
+                break
             cmpt_type = classifier(cmpt)
 
         # Fall back to main classifier
-        if cmpt_type == 'unknown':
+        if cmpt_type == "unknown":
             cmpt_type = ClassifyMain.classify(cmpt)
-        
+
         return cmpt_type
 
     @staticmethod
     def discover_more(cmpt):
         conditions = [
             cmpt.find("g-scrolling-carousel"),
         ]
-        return 'discover_more' if all(conditions) else "unknown"
+        return "discover_more" if all(conditions) else "unknown"
 
     @staticmethod
     def omitted_notice(cmpt):
         conditions = [
-            cmpt.find("p", {"id":"ofr"}),
-            (webutils.get_text(cmpt, "h2") == "Notices about Filtered Results"),
+            cmpt.find("p", {"id": "ofr"}),
+            (utils.get_text(cmpt, "h2") == "Notices about Filtered Results"),
         ]
         return "omitted_notice" if any(conditions) else "unknown"
 
     @staticmethod
     def searches_related(cmpt):
-        known_labels = {'Related', 
-                        'Related searches', 
-                        'People also search for', 
-                        'Related to this search',
-                        'Searches related to'}
-        h3 = cmpt.find('h3')
+        known_labels = {
+            "Related",
+            "Related searches",
+            "People also search for",
+            "Related to this search",
+            "Searches related to",
+        }
+        h3 = cmpt.find("h3")
         h3_matches = [h3.text.strip().startswith(text) for text in known_labels] if h3 else []
-        return 'searches_related' if any(h3_matches) else 'unknown'
+        return "searches_related" if any(h3_matches) else "unknown"
diff --git a/WebSearcher/classifiers/header_components.py b/WebSearcher/classifiers/header_components.py
@@ -1,15 +1,16 @@
-from .. import webutils
 import bs4
 
+from .. import utils
+
 
 class ClassifyHeaderComponent:
     """Classify a component from the header section based on its bs4.element.Tag"""
 
     @staticmethod
     def classify(cmpt: bs4.element.Tag) -> str:
         """Classify the component type based on header text"""
-        
+
         cmpt_type = "unknown"
-        if webutils.check_dict_value(cmpt.attrs, "id", ["taw", "topstuff"]):
+        if utils.check_dict_value(cmpt.attrs, "id", ["taw", "topstuff"]):
             cmpt_type = "notice"
         return cmpt_type